Skip to content

Commit 1d64c86

Browse files
committed
add yaml tests with the fields option and patch the vectors as list to match the xcontent parsing
1 parent 52d9278 commit 1d64c86

File tree

4 files changed

+93
-20
lines changed

4 files changed

+93
-20
lines changed

rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/240_source_synthetic_vectors.yml

Lines changed: 70 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,73 @@ setup:
150150
- length: { hits.hits.3._source.nested.0.vector: 3 }
151151
- match: { hits.hits.3._source.nested.0.paragraph_id: 0 }
152152

153+
- do:
154+
search:
155+
index: test
156+
body:
157+
_source:
158+
exclude_vectors: false
159+
includes: nested.vector
160+
sort: ["name"]
161+
162+
- match: { hits.hits.0._id: "1"}
163+
- length: { hits.hits.0._source: 0}
164+
165+
- match: { hits.hits.1._id: "2"}
166+
- length: { hits.hits.3._source: 1 }
167+
- length: { hits.hits.1._source.nested: 3 }
168+
- exists: hits.hits.1._source.nested.0.vector
169+
- not_exists: hits.hits.1._source.nested.0.paragraph_id
170+
- exists: hits.hits.1._source.nested.1.vector
171+
- not_exists: hits.hits.1._source.nested.1.paragraph_id
172+
- exists: hits.hits.1._source.nested.2.vector
173+
- not_exists: hits.hits.1._source.nested.2.paragraph_id
174+
175+
- match: { hits.hits.2._id: "3" }
176+
- length: { hits.hits.2._source: 0}
177+
178+
- match: { hits.hits.3._id: "4" }
179+
- length: { hits.hits.3._source: 1 }
180+
- length: { hits.hits.3._source.nested: 2 }
181+
- exists: hits.hits.3._source.nested.0.vector
182+
- length: { hits.hits.3._source.nested.0.vector: 3 }
183+
- not_exists: hits.hits.3._source.nested.0.paragraph_id
184+
- exists: hits.hits.3._source.nested.1.vector
185+
- length: { hits.hits.3._source.nested.1.vector: 3 }
186+
- not_exists: hits.hits.3._source.nested.1.paragraph_id
187+
188+
- do:
189+
headers:
190+
# Force JSON content type so that we use a parser that interprets the embeddings as doubles
191+
Content-Type: application/json
192+
search:
193+
index: test
194+
body:
195+
_source:
196+
exclude_vectors: true
197+
sort: ["name"]
198+
fields: ["vector"]
199+
200+
- match: { hits.hits.0._id: "1"}
201+
- match: { hits.hits.0._source.name: "cow.jpg"}
202+
- not_exists: hits.hits.0._source.vector
203+
- match: { hits.hits.0.fields.vector: [1.0, 2.0, 3.0]}
204+
205+
- match: { hits.hits.1._id: "2"}
206+
- match: { hits.hits.1._source.name: "moose.jpg"}
207+
- length: { hits.hits.1._source.nested: 3 }
208+
- not_exists: hits.hits.1._source.nested.0.vector
209+
210+
- match: { hits.hits.2._id: "3" }
211+
- match: { hits.hits.2._source.name: "rabbit.jpg" }
212+
- match: { hits.hits.2.fields.vector: [10.0, 11.0, 12.0]}
213+
214+
- match: { hits.hits.3._id: "4" }
215+
- match: { hits.hits.3._source.name: "zoolander.jpg" }
216+
- length: { hits.hits.3._source.nested: 3 }
217+
- not_exists: hits.hits.3._source.nested.0.vector
218+
219+
153220
---
154221
"Bulk partial update with synthetic vectors":
155222
- do:
@@ -220,7 +287,7 @@ setup:
220287
"Partial update with synthetic vectors":
221288
- do:
222289
headers:
223-
# Force JSON content type so that we use a parser that interprets the embeddings as doubles
290+
# Force JSON content type so that we use a parser that interprets the vectors as doubles
224291
Content-Type: application/json
225292
update:
226293
index: test
@@ -247,7 +314,7 @@ setup:
247314

248315
- do:
249316
headers:
250-
# Force JSON content type so that we use a parser that interprets the embeddings as doubles
317+
# Force JSON content type so that we use a parser that interprets the vectors as doubles
251318
Content-Type: application/json
252319
get:
253320
_source_exclude_vectors: false
@@ -263,7 +330,7 @@ setup:
263330

264331
- do:
265332
headers:
266-
# Force JSON content type so that we use a parser that interprets the embeddings as doubles
333+
# Force JSON content type so that we use a parser that interprets the vectors as doubles
267334
Content-Type: application/json
268335
search:
269336
index: test

server/src/main/java/org/elasticsearch/index/get/ShardGetService.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -477,7 +477,7 @@ public static Tuple<FetchSourceContext, SourceFilter> maybeExcludeSyntheticVecto
477477
fetchSourceContext.includes(),
478478
lateExcludes.toArray(String[]::new)
479479
);
480-
return Tuple.tuple(newFetchSourceContext, excludes.isEmpty() ? null : sourceFilter);
480+
return Tuple.tuple(newFetchSourceContext, sourceFilter);
481481
}
482482
return Tuple.tuple(fetchSourceContext, sourceFilter);
483483
}

server/src/main/java/org/elasticsearch/index/mapper/SourceLoader.java

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
import org.elasticsearch.index.fieldvisitor.LeafStoredFieldLoader;
1919
import org.elasticsearch.search.lookup.Source;
2020
import org.elasticsearch.search.lookup.SourceFilter;
21-
import org.elasticsearch.search.vectors.VectorData;
2221
import org.elasticsearch.xcontent.XContentBuilder;
2322
import org.elasticsearch.xcontent.json.JsonXContent;
2423

@@ -477,7 +476,7 @@ private static void applyPatches(String rootPath, Map<String, Object> map, List<
477476
for (SyntheticVectorPatch patch : patches) {
478477
if (patch instanceof LeafSyntheticVectorPath leaf) {
479478
String key = extractRelativePath(rootPath, leaf.fullPath());
480-
map.put(key, leaf.value().isFloat() ? leaf.value().floatVector() : leaf.value().byteVector());
479+
map.put(key, leaf.value());
481480
} else if (patch instanceof NestedSyntheticVectorPath nested) {
482481
String nestedPath = extractRelativePath(rootPath, nested.fullPath());
483482
List<Map<?, ?>> nestedMaps = XContentMapValues.extractNestedSources(nestedPath, map);
@@ -528,7 +527,7 @@ record NestedOffsetSyntheticVectorPath(int offset, List<SyntheticVectorPatch> ch
528527
* @param fullPath the fully-qualified field name
529528
* @param value the value to assign
530529
*/
531-
record LeafSyntheticVectorPath(String fullPath, VectorData value) implements SyntheticVectorPatch {}
530+
record LeafSyntheticVectorPath(String fullPath, Object value) implements SyntheticVectorPatch {}
532531

533532
interface SyntheticVectorsLoader {
534533
/**

server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java

Lines changed: 20 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@
101101
import java.nio.ByteBuffer;
102102
import java.nio.ByteOrder;
103103
import java.time.ZoneId;
104+
import java.util.ArrayList;
104105
import java.util.Arrays;
105106
import java.util.HexFormat;
106107
import java.util.List;
@@ -3151,32 +3152,35 @@ public void write(XContentBuilder b) throws IOException {
31513152
}
31523153

31533154
/**
3154-
* Returns a deep-copied vector for the current document, either as a float array
3155-
* (with optional cosine normalization) or a byte array.
3155+
* Returns a deep-copied vector for the current document, either as a list of floats
3156+
* (with optional cosine normalization) or a list of bytes.
31563157
*
3157-
* @return the {@link VectorData} instance representing the current vector
3158-
* @throws IOException if vector data is not available or reading fails
3158+
* @throws IOException if reading fails
31593159
*/
3160-
public VectorData copyVector() throws IOException {
3160+
public Object copyVectorAsList() throws IOException {
31613161
assert hasValue : "vector is null for ord=" + ord;
31623162
if (floatValues != null) {
31633163
float[] raw = floatValues.vectorValue(ord);
3164-
float[] copy = new float[raw.length];
3164+
List<Float> copyList = new ArrayList<>(raw.length);
31653165

31663166
if (hasMagnitude) {
31673167
float mag = Float.intBitsToFloat((int) magnitudeReader.longValue());
31683168
for (int i = 0; i < raw.length; i++) {
3169-
copy[i] = raw[i] * mag;
3169+
copyList.add(raw[i] * mag);
31703170
}
31713171
} else {
3172-
System.arraycopy(raw, 0, copy, 0, raw.length);
3172+
for (int i = 0; i < raw.length; i++) {
3173+
copyList.add(raw[i]);
3174+
}
31733175
}
3174-
return VectorData.fromFloats(copy);
3176+
return copyList;
31753177
} else if (byteValues != null) {
31763178
byte[] raw = byteValues.vectorValue(ord);
3177-
byte[] copy = new byte[raw.length];
3178-
System.arraycopy(raw, 0, copy, 0, raw.length);
3179-
return VectorData.fromBytes(copy);
3179+
List<Byte> copyList = new ArrayList<>(raw.length);
3180+
for (int i = 0; i < raw.length; i++) {
3181+
copyList.add(raw[i]);
3182+
}
3183+
return copyList;
31803184
}
31813185

31823186
throw new IllegalStateException("No vector values available to copy.");
@@ -3259,7 +3263,10 @@ public SourceLoader.SyntheticVectorsLoader.Leaf leaf(LeafReaderContext context)
32593263
}
32603264
dvLoader.advanceToDoc(doc);
32613265
if (syntheticFieldLoader.hasValue()) {
3262-
acc.add(new SourceLoader.LeafSyntheticVectorPath(syntheticFieldLoader.fieldName(), syntheticFieldLoader.copyVector()));
3266+
// add vectors as list since that's how they're parsed from xcontent.
3267+
acc.add(
3268+
new SourceLoader.LeafSyntheticVectorPath(syntheticFieldLoader.fieldName(), syntheticFieldLoader.copyVectorAsList())
3269+
);
32633270
}
32643271
};
32653272
}

0 commit comments

Comments
 (0)