add yml tests for partial updates and get API

jimczi · jimczi · commit fa027430ae09 · 2025-07-01T17:16:23.000+01:00
diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/get.json b/rest-api-spec/src/main/resources/rest-api-spec/api/get.json
@@ -68,6 +68,10 @@
         "type":"list",
         "description":"A list of fields to extract and return from the _source field"
       },
+      "_source_exclude_vectors":{
+        "type":"boolean",
+        "description":"Whether vectors should be excluded from _source"
+      },
       "version":{
         "type":"number",
         "description":"Explicit version number for concurrency control"
diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/search.json b/rest-api-spec/src/main/resources/rest-api-spec/api/search.json
@@ -155,6 +155,10 @@
         "type":"list",
         "description":"A list of fields to extract and return from the _source field"
       },
+      "_source_exclude_vectors":{
+        "type":"boolean",
+        "description":"Whether vectors should be excluded from _source"
+      },
       "terminate_after":{
         "type":"number",
         "description":"The maximum number of documents to collect for each shard, upon reaching which the query execution will terminate early."
diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/240_source_synthetic_vectors.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/240_source_synthetic_vectors.yml
@@ -21,7 +21,7 @@ setup:
                 type: keyword
               vector:
                 type: dense_vector
-                dims: 5
+                dims: 3
                 similarity: l2_norm
 
               nested:
@@ -31,7 +31,7 @@ setup:
                     type: keyword
                   vector:
                     type: dense_vector
-                    dims: 5
+                    dims: 3
                     similarity: l2_norm
 
   - do:
@@ -40,7 +40,7 @@ setup:
         id: "1"
         body:
           name: cow.jpg
-          vector: [36, 267, -311, 12, -202]
+          vector: [1, 2, 3]
 
   - do:
       index:
@@ -50,19 +50,19 @@ setup:
           name: moose.jpg
           nested:
           - paragraph_id: 0
-            vector: [-0.5, 100.0, -13, 14.8, -156.0]
+            vector: [1, 2, 3]
           - paragraph_id: 2
-            vector: [0, 100.0, 0, 14.8, -156.0]
+            vector: [4, 5, 6]
           - paragraph_id: 3
-            vector: [0, 1.0, 0, 1.8, -15.0]
+            vector: [7, 8, 9]
 
   - do:
       index:
         index: test
         id: "3"
         body:
           name: rabbit.jpg
-          vector: [-0.5, 100.0, -13, 14.8, -156.0]
+          vector: [10, 11, 12]
 
   - do:
       index:
@@ -72,11 +72,10 @@ setup:
           name: zoolander.jpg
           nested:
             - paragraph_id: 0
-              vector: [ -0.5, 100.0, -13, 14.8, -156.0 ]
+              vector: [ 13, 14, 15 ]
             - paragraph_id: 1
             - paragraph_id: 2
-              vector: [ -9.8, 109, 32, 14.8, 23 ]
-
+              vector: [ 16, 17, 18 ]
 
   - do:
       indices.refresh: {}
@@ -148,4 +147,134 @@ setup:
   - match:      { hits.hits.3._source.name: "zoolander.jpg" }
   - length:     { hits.hits.3._source.nested: 3 }
   - exists:       hits.hits.3._source.nested.0.vector
+  - length:      { hits.hits.3._source.nested.0.vector: 3 }
   - match:      { hits.hits.3._source.nested.0.paragraph_id: 0 }
+
+---
+"Bulk partial update with synthetic vectors":
+  - do:
+      headers:
+        # Force JSON content type so that we use a parser that interprets the embeddings as doubles
+        Content-Type: application/json
+      bulk:
+        index: test
+        _source: true
+        body:
+          - '{"update": {"_id": "4"}}'
+          - >
+            {
+              "doc": {
+                "name": "zoolander2.jpg",
+                "vector": [1, 2, 4]
+              }
+            }
+
+  - match:      { items.0.update.get._source.vector: [1, 2, 4] }
+  - exists:       items.0.update.get._source.nested
+  - length:     { items.0.update.get._source.nested: 3}
+  - exists:       items.0.update.get._source.nested.0.vector
+  - match:      { items.0.update.get._source.nested.0.paragraph_id: 0 }
+  - length:     { items.0.update.get._source.nested.0.vector: 3 }
+  - not_exists:   items.0.update.get._source.nested.1.vector
+  - match:      { items.0.update.get._source.nested.1.paragraph_id: 1 }
+  - exists:       items.0.update.get._source.nested.2.vector
+  - length:     { items.0.update.get._source.nested.2.vector: 3 }
+  - match:      { items.0.update.get._source.nested.2.paragraph_id: 2 }
+  - set:        { items.0.update.get._source.nested: original_nested }
+
+  - do:
+      headers:
+        # Force JSON content type so that we use a parser that interprets the embeddings as doubles
+        Content-Type: application/json
+      get:
+        _source_exclude_vectors: false
+        index: test
+        id: "4"
+
+  - match: { _source.vector: [1.0, 2.0, 4.0] }
+  - match: { _source.name: zoolander2.jpg }
+  - match: { _source.nested: $original_nested }
+
+  - do:
+      indices.refresh: {}
+
+  - do:
+      headers:
+        # Force JSON content type so that we use a parser that interprets the embeddings as doubles
+        Content-Type: application/json
+      search:
+        index: test
+        body:
+          _source:
+            "exclude_vectors": false
+          query:
+            term:
+              _id: 4
+
+  - match: { hits.total.value: 1 }
+  - match: { hits.total.relation: eq }
+  - match: { hits.hits.0._source.name: zoolander2.jpg }
+  - match: { hits.hits.0._source.nested: $original_nested }
+
+---
+"Partial update with synthetic vectors":
+  - do:
+      headers:
+        # Force JSON content type so that we use a parser that interprets the embeddings as doubles
+        Content-Type: application/json
+      update:
+        index: test
+        id: "4"
+        body:
+          _source: true
+          doc: {
+            "name": "zoolander3.jpg",
+            "vector": [6, 8, 9]
+          }
+
+  - match:      { get._source.vector: [6, 8, 9] }
+  - exists:       get._source.nested
+  - length:     { get._source.nested: 3}
+  - exists:       get._source.nested.0.vector
+  - match:      { get._source.nested.0.paragraph_id: 0 }
+  - length:     { get._source.nested.0.vector: 3 }
+  - not_exists:   get._source.nested.1.vector
+  - match:      { get._source.nested.1.paragraph_id: 1 }
+  - exists:       get._source.nested.2.vector
+  - length:     { get._source.nested.2.vector: 3 }
+  - match:      { get._source.nested.2.paragraph_id: 2 }
+  - set:        { get._source.nested: original_nested }
+
+  - do:
+      headers:
+        # Force JSON content type so that we use a parser that interprets the embeddings as doubles
+        Content-Type: application/json
+      get:
+        _source_exclude_vectors: false
+        index: test
+        id: "4"
+
+  - match: { _source.vector: [6.0, 8.0, 9.0] }
+  - match: { _source.name: zoolander3.jpg }
+  - match: { _source.nested: $original_nested }
+
+  - do:
+      indices.refresh: {}
+
+  - do:
+      headers:
+        # Force JSON content type so that we use a parser that interprets the embeddings as doubles
+        Content-Type: application/json
+      search:
+        index: test
+        body:
+          _source:
+            "exclude_vectors": false
+          query:
+            term:
+              _id: 4
+
+  - match: { hits.total.value: 1 }
+  - match: { hits.total.relation: eq }
+  - match: { hits.hits.0._source.name: zoolander3.jpg }
+  - match: { hits.hits.0._source.nested: $original_nested }
diff --git a/server/src/main/java/org/elasticsearch/search/fetch/subphase/FetchSourceContext.java b/server/src/main/java/org/elasticsearch/search/fetch/subphase/FetchSourceContext.java
@@ -157,8 +157,10 @@ public static FetchSourceContext parseFromRestRequest(RestRequest request) {
             sourceExcludes = Strings.splitStringByCommaToArray(sExcludes);
         }
 
-        if (fetchSource != null || sourceIncludes != null || sourceExcludes != null) {
-            return FetchSourceContext.of(fetchSource == null || fetchSource, sourceIncludes, sourceExcludes);
+        Boolean excludeVectors = request.paramAsBoolean("_source_exclude_vectors", null);
+
+        if (excludeVectors != null || fetchSource != null || sourceIncludes != null || sourceExcludes != null) {
+            return FetchSourceContext.of(fetchSource == null || fetchSource, excludeVectors, sourceIncludes, sourceExcludes);
         }
         return null;
     }

Original file line number	Diff line number	Diff line change
`@@ -157,8 +157,10 @@ public static FetchSourceContext parseFromRestRequest(RestRequest request) {`
`157`	`157`	`sourceExcludes = Strings.splitStringByCommaToArray(sExcludes);`
`158`	`158`	`}`
`159`	`159`
`160`		`- if (fetchSource != null \|\| sourceIncludes != null \|\| sourceExcludes != null) {`
`161`		`- return FetchSourceContext.of(fetchSource == null \|\| fetchSource, sourceIncludes, sourceExcludes);`
	`160`	`+ Boolean excludeVectors = request.paramAsBoolean("_source_exclude_vectors", null);`
	`161`	`+`
	`162`	`+ if (excludeVectors != null \|\| fetchSource != null \|\| sourceIncludes != null \|\| sourceExcludes != null) {`
	`163`	`+ return FetchSourceContext.of(fetchSource == null \|\| fetchSource, excludeVectors, sourceIncludes, sourceExcludes);`
`162`	`164`	`}`
`163`	`165`	`return null;`
`164`	`166`	`}`