elastic · jimczi · Jun 9, 2025 · Jun 2, 2025 · Jun 2, 2025 · Jun 2, 2025
diff --git a/docs/changelog/128735.yaml b/docs/changelog/128735.yaml
@@ -0,0 +1,5 @@
+pr: 128735
+summary: Add option to include or exclude vectors from `_source` retrieval
+area: Vector Search
+type: feature
+issues: []
diff --git a/...c/yamlRestTest/resources/rest-api-spec/test/search.vectors/230_include_vectors_search.yml b/...c/yamlRestTest/resources/rest-api-spec/test/search.vectors/230_include_vectors_search.yml
@@ -0,0 +1,176 @@
+setup:
+  - requires:
+      reason: 'include_vectors option is required'
+      test_runner_features: [ capabilities ]
+      capabilities:
+        - method: GET
+          path: /_search
+          capabilities: [ include_vectors_param ]
+  - skip:
+      features: "headers"
+
+  - do:
+      indices.create:
+        index: test
+        body:
+          mappings:
+            properties:
+              name:
+                type: keyword
+              sparse_vector:
+                type: sparse_vector
+              vector:
+                type: dense_vector
+                dims: 5
+                similarity: l2_norm
+
+              nested:
+                type: nested
+                properties:
+                  paragraph_id:
+                    type: keyword
+                  vector:
+                    type: dense_vector
+                    dims: 5
+                    similarity: l2_norm
+                  sparse_vector:
+                    type: sparse_vector
+
+  - do:
+      index:
+        index: test
+        id: "1"
+        body:
+          name: cow.jpg
+          vector: [36, 267, -311, 12, -202]
+
+  - do:
+      index:
+        index: test
+        id: "2"
+        body:
+          name: moose.jpg
+          nested:
+          - paragraph_id: 0
+            vector: [-0.5, 100.0, -13, 14.8, -156.0]
+          - paragraph_id: 2
+            vector: [0, 100.0, 0, 14.8, -156.0]
+          - paragraph_id: 3
+            vector: [0, 1.0, 0, 1.8, -15.0]
+
+  - do:
+      index:
+        index: test
+        id: "3"
+        body:
+          name: rabbit.jpg
+          vector: [-0.5, 100.0, -13, 14.8, -156.0]
+          sparse_vector:
+            running: 3
+            good: 17
+            run: 22
+
+  - do:
+      index:
+        index: test
+        id: "4"
+        body:
+          name: zoolander.jpg
+          nested:
+            - paragraph_id: 0
+              vector: [ -0.5, 100.0, -13, 14.8, -156.0 ]
+              sparse_vector:
+                running: 3
+                good: 17
+                run: 22
+            - paragraph_id: 1
+              sparse_vector:
+                modeling: 32
+                model: 20
+                mode: 54
+            - paragraph_id: 2
+              vector: [ -9.8, 109, 32, 14.8, 23 ]
+
+
+  - do:
+      indices.refresh: {}
+
+---
+"exclude vectors":
+  - do:
+      search:
+        index: test
+        body:
+          _source:
+            include_vectors: false
+          sort: ["name"]
+
+  - match:      { hits.hits.0._id: "1"}
+  - match:      { hits.hits.0._source.name: "cow.jpg"}
+  - not_exists:   hits.hits.0._source.vector
+
+  - match:      { hits.hits.1._id: "2"}
+  - match:      { hits.hits.1._source.name: "moose.jpg"}
+  - length:     { hits.hits.1._source.nested: 3 }
+  - not_exists:   hits.hits.1._source.nested.0.vector
+  - match:      { hits.hits.1._source.nested.0.paragraph_id: 0 }
+  - not_exists:   hits.hits.1._source.nested.1.vector
+  - match:      { hits.hits.1._source.nested.1.paragraph_id: 2 }
+  - not_exists:   hits.hits.1._source.nested.2.vector
+  - match:      { hits.hits.1._source.nested.2.paragraph_id: 3 }
+
+  - match:      { hits.hits.2._id: "3" }
+  - match:      { hits.hits.2._source.name: "rabbit.jpg" }
+  - not_exists:   hits.hits.2._source.vector
+  - not_exists:   hits.hits.2._source.sparse_vector
+
+  - match:      { hits.hits.3._id: "4" }
+  - match:      { hits.hits.3._source.name: "zoolander.jpg" }
+  - length:     { hits.hits.3._source.nested: 3 }
+  - not_exists:   hits.hits.3._source.nested.0.vector
+  - not_exists:   hits.hits.3._source.nested.0.sparse_vector
+  - match:      { hits.hits.3._source.nested.0.paragraph_id: 0 }
+  - not_exists:   hits.hits.3._source.nested.1.sparse_vector
+  - match:      { hits.hits.3._source.nested.1.paragraph_id: 1 }
+  - not_exists:   hits.hits.3._source.nested.2.vector
+  - match:      { hits.hits.3._source.nested.2.paragraph_id: 2 }
+
+---
+"include vectors":
+  - do:
+      search:
+        index: test
+        body:
+          _source:
+            include_vectors: true
+          sort: ["name"]
+
+  - match:      { hits.hits.0._id: "1"}
+  - match:      { hits.hits.0._source.name: "cow.jpg"}
+  - exists:       hits.hits.0._source.vector
+
+  - match:      { hits.hits.1._id: "2"}
+  - match:      { hits.hits.1._source.name: "moose.jpg"}
+  - length:     { hits.hits.1._source.nested: 3 }
+  - exists:       hits.hits.1._source.nested.0.vector
+  - match:      { hits.hits.1._source.nested.0.paragraph_id: 0 }
+  - exists:       hits.hits.1._source.nested.1.vector
+  - match:      { hits.hits.1._source.nested.1.paragraph_id: 2 }
+  - exists:       hits.hits.1._source.nested.2.vector
+  - match:      { hits.hits.1._source.nested.2.paragraph_id: 3 }
+
+  - match:      { hits.hits.2._id: "3" }
+  - match:      { hits.hits.2._source.name: "rabbit.jpg" }
+  - exists:       hits.hits.2._source.vector
+  - exists:       hits.hits.2._source.sparse_vector
+
+  - match:      { hits.hits.3._id: "4" }
+  - match:      { hits.hits.3._source.name: "zoolander.jpg" }
+  - length:     { hits.hits.3._source.nested: 3 }
+  - exists:       hits.hits.3._source.nested.0.vector
+  - exists:       hits.hits.3._source.nested.0.sparse_vector
+  - match:      { hits.hits.3._source.nested.0.paragraph_id: 0 }
+  - exists:       hits.hits.3._source.nested.1.sparse_vector
+  - match:      { hits.hits.3._source.nested.1.paragraph_id: 1 }
+  - exists:       hits.hits.3._source.nested.2.vector
+  - match:      { hits.hits.3._source.nested.2.paragraph_id: 2 }
diff --git a/server/src/main/java/org/elasticsearch/TransportVersions.java b/server/src/main/java/org/elasticsearch/TransportVersions.java
@@ -184,6 +184,7 @@ static TransportVersion def(int id) {
     public static final TransportVersion ML_INFERENCE_SAGEMAKER_CHAT_COMPLETION_8_19 = def(8_841_0_37);
     public static final TransportVersion ML_INFERENCE_VERTEXAI_CHATCOMPLETION_ADDED_8_19 = def(8_841_0_38);
     public static final TransportVersion INFERENCE_CUSTOM_SERVICE_ADDED_8_19 = def(8_841_0_39);
+    public static final TransportVersion SEARCH_SOURCE_INCLUDE_VECTORS_PARAM_8_19 = def(8_841_0_40);
     public static final TransportVersion V_9_0_0 = def(9_000_0_09);
     public static final TransportVersion INITIAL_ELASTICSEARCH_9_0_1 = def(9_000_0_10);
     public static final TransportVersion INITIAL_ELASTICSEARCH_9_0_2 = def(9_000_0_11);
@@ -273,7 +274,7 @@ static TransportVersion def(int id) {
     public static final TransportVersion INFERENCE_CUSTOM_SERVICE_ADDED = def(9_084_0_00);
     public static final TransportVersion ESQL_LIMIT_ROW_SIZE = def(9_085_0_00);
     public static final TransportVersion ESQL_REGEX_MATCH_WITH_CASE_INSENSITIVITY = def(9_086_0_00);
-
+    public static final TransportVersion SEARCH_SOURCE_INCLUDE_VECTORS_PARAM = def(9_087_0_00);
     /*
      * STOP! READ THIS FIRST! No, really,
      *        ____ _____ ___  ____  _        ____  _____    _    ____    _____ _   _ ___ ____    _____ ___ ____  ____ _____ _

diff --git a/server/src/main/java/org/elasticsearch/rest/action/search/SearchCapabilities.java b/server/src/main/java/org/elasticsearch/rest/action/search/SearchCapabilities.java
@@ -50,6 +50,8 @@ private SearchCapabilities() {}
 
     private static final String SIGNIFICANT_TERMS_BACKGROUND_FILTER_AS_SUB = "significant_terms_background_filter_as_sub";
 
+    private static final String INCLUDE_VECTORS_PARAM = "include_vectors_param";
+
     public static final Set<String> CAPABILITIES;
     static {
         HashSet<String> capabilities = new HashSet<>();
@@ -69,6 +71,7 @@ private SearchCapabilities() {}
         capabilities.add(HIGHLIGHT_MAX_ANALYZED_OFFSET_DEFAULT);
         capabilities.add(INDEX_SELECTOR_SYNTAX);
         capabilities.add(SIGNIFICANT_TERMS_BACKGROUND_FILTER_AS_SUB);
+        capabilities.add(INCLUDE_VECTORS_PARAM);
         CAPABILITIES = Set.copyOf(capabilities);
     }
 }
diff --git a/server/src/main/java/org/elasticsearch/search/fetch/FetchContext.java b/server/src/main/java/org/elasticsearch/search/fetch/FetchContext.java
@@ -68,7 +68,9 @@ private static FetchSourceContext buildFetchSourceContext(SearchContext in) {
         if (sfc != null && sfc.fetchFields()) {
             for (String field : sfc.fieldNames()) {
                 if (SourceFieldMapper.NAME.equals(field)) {
-                    fsc = fsc == null ? FetchSourceContext.of(true) : FetchSourceContext.of(true, fsc.includes(), fsc.excludes());
+                    fsc = fsc == null
+                        ? FetchSourceContext.of(true)
+                        : FetchSourceContext.of(true, fsc.includeVectors(), fsc.includes(), fsc.excludes());
                 }
             }
         }

diff --git a/server/src/main/java/org/elasticsearch/search/fetch/FetchPhase.java b/server/src/main/java/org/elasticsearch/search/fetch/FetchPhase.java
@@ -14,21 +14,26 @@
 import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.search.TotalHits;
 import org.elasticsearch.common.bytes.BytesReference;
+import org.elasticsearch.common.regex.Regex;
 import org.elasticsearch.index.fieldvisitor.LeafStoredFieldLoader;
 import org.elasticsearch.index.fieldvisitor.StoredFieldLoader;
 import org.elasticsearch.index.mapper.IdLoader;
 import org.elasticsearch.index.mapper.SourceLoader;
+import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper;
+import org.elasticsearch.index.mapper.vectors.SparseVectorFieldMapper;
 import org.elasticsearch.search.LeafNestedDocuments;
 import org.elasticsearch.search.NestedDocuments;
 import org.elasticsearch.search.SearchContextSourcePrinter;
 import org.elasticsearch.search.SearchHit;
 import org.elasticsearch.search.SearchHits;
 import org.elasticsearch.search.SearchShardTarget;
 import org.elasticsearch.search.fetch.FetchSubPhase.HitContext;
+import org.elasticsearch.search.fetch.subphase.FetchSourceContext;
 import org.elasticsearch.search.fetch.subphase.InnerHitsContext;
 import org.elasticsearch.search.fetch.subphase.InnerHitsPhase;
 import org.elasticsearch.search.internal.SearchContext;
 import org.elasticsearch.search.lookup.Source;
+import org.elasticsearch.search.lookup.SourceFilter;
 import org.elasticsearch.search.lookup.SourceProvider;
 import org.elasticsearch.search.profile.ProfileResult;
 import org.elasticsearch.search.profile.Profilers;
@@ -45,6 +50,7 @@
 import java.util.List;
 import java.util.Map;
 import java.util.function.Supplier;
+import java.util.stream.Collectors;
 
 /**
  * Fetch phase of a search request, used to fetch the actual top matching documents to be returned to the client, identified
@@ -111,7 +117,13 @@ public Source getSource(LeafReaderContext ctx, int doc) {
     }
 
     private SearchHits buildSearchHits(SearchContext context, int[] docIdsToLoad, Profiler profiler, RankDocShardInfo rankDocs) {
-        SourceLoader sourceLoader = context.newSourceLoader(null);
+        // Optionally remove sparse and dense vector fields early to:
+        // - Reduce the in-memory size of the source
+        // - Speed up retrieval of the synthetic source
+        // Note: These vectors will no longer be accessible via _source for any sub-fetch processors,
+        // but they are typically accessed through doc values instead (e.g: re-scorer).
+        SourceFilter sourceFilter = maybeExcludeNonSemanticTextVectors(context);
+        SourceLoader sourceLoader = context.newSourceLoader(sourceFilter);
         FetchContext fetchContext = new FetchContext(context, sourceLoader);
 
         PreloadedSourceProvider sourceProvider = new PreloadedSourceProvider();
@@ -432,4 +444,39 @@ public String toString() {
             }
         };
     }
+
+    /**
+     * Determines whether vector fields should be excluded from the source based on the {@link FetchSourceContext}.
+     * Returns {@code true} if vector fields are explicitly marked to be excluded and {@code false} otherwise.
+     */
+    private static boolean shouldExcludeVectorsFromSource(SearchContext context) {
+        if (context.fetchSourceContext() == null) {
+            return false;
+        }
+        return context.fetchSourceContext().includeVectors() != null && context.fetchSourceContext().includeVectors() == false;
+    }
+
+    /**
+     * Returns a {@link SourceFilter} that excludes vector fields not associated with semantic text fields,
+     * unless vectors are explicitly requested to be included in the source.
+     * Returns {@code null} when vectors should not be filtered out.
+     */
+    private static SourceFilter maybeExcludeNonSemanticTextVectors(SearchContext context) {
+        if (shouldExcludeVectorsFromSource(context) == false) {
+            return null;
+        }
+        var lookup = context.getSearchExecutionContext().getMappingLookup();
+        List<String> inferencePatterns = lookup.inferenceFields().isEmpty() ? null : lookup.inferenceFields().keySet().stream().toList();
+        var excludes = lookup.getFullNameToFieldType()
+            .values()
+            .stream()
+            .filter(
+                f -> f instanceof DenseVectorFieldMapper.DenseVectorFieldType || f instanceof SparseVectorFieldMapper.SparseVectorFieldType
+            )
+            // Exclude vectors from semantic text fields, as they are processed separately
+            .filter(f -> Regex.simpleMatch(inferencePatterns, f.name()) == false)
+            .map(f -> f.name())
+            .collect(Collectors.toList());
+        return excludes.isEmpty() ? null : new SourceFilter(new String[] {}, excludes.toArray(String[]::new));
+    }
 }