Skip to content

Commit fcb713e

Browse files
[8.14] Correct how hex strings are handled when dynamically updating vector dims (#109423) (#109448)
* Correct how hex strings are handled when dynamically updating vector dims (#109423) closes: #109411 * adjusting version --------- Co-authored-by: Elastic Machine <[email protected]>
1 parent dc8513b commit fcb713e

File tree

3 files changed

+67
-0
lines changed

3 files changed

+67
-0
lines changed

docs/changelog/109423.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 109423
2+
summary: Correct how hex strings are handled when dynamically updating vector dims
3+
area: Vector Search
4+
type: bug
5+
issues: []

rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/170_knn_search_hex_encoded_byte_vectors.yml

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,3 +161,43 @@ setup:
161161
- match: { hits.hits.0._id: "3" }
162162
- match: { hits.hits.1._id: "2" }
163163
- match: { hits.hits.2._id: "1" }
164+
---
165+
"Dynamic dimensions for hex-encoded string":
166+
- requires:
167+
cluster_features: "gte_v8.14.1"
168+
reason: 'hex encoding for byte vectors fixed in 8.14.1'
169+
170+
- do:
171+
indices.create:
172+
index: knn_hex_vector_index_dyn_dims
173+
body:
174+
settings:
175+
number_of_shards: 1
176+
mappings:
177+
properties:
178+
my_vector_byte:
179+
type: dense_vector
180+
index : false
181+
element_type: byte
182+
my_vector_byte_indexed:
183+
type: dense_vector
184+
index: true
185+
element_type: byte
186+
similarity : l2_norm
187+
188+
# [-128, 127, 10] - is encoded as '807f0a'
189+
- do:
190+
index:
191+
index: knn_hex_vector_index_dyn_dims
192+
id: "1"
193+
body:
194+
my_vector_byte: "807f0a"
195+
my_vector_byte_indexed: "807f0a"
196+
197+
# assert the index is created with 3 dimensions
198+
- do:
199+
indices.get_mapping:
200+
index: knn_hex_vector_index_dyn_dims
201+
202+
- match: { knn_hex_vector_index_dyn_dims.mappings.properties.my_vector_byte.dims: 3 }
203+
- match: { knn_hex_vector_index_dyn_dims.mappings.properties.my_vector_byte_indexed.dims: 3 }

server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -457,6 +457,28 @@ int getNumBytes(int dimensions) {
457457
ByteBuffer createByteBuffer(IndexVersion indexVersion, int numBytes) {
458458
return ByteBuffer.wrap(new byte[numBytes]);
459459
}
460+
461+
@Override
462+
int parseDimensionCount(DocumentParserContext context) throws IOException {
463+
XContentParser.Token currentToken = context.parser().currentToken();
464+
return switch (currentToken) {
465+
case START_ARRAY -> {
466+
int index = 0;
467+
for (Token token = context.parser().nextToken(); token != Token.END_ARRAY; token = context.parser().nextToken()) {
468+
index++;
469+
}
470+
yield index;
471+
}
472+
case VALUE_STRING -> {
473+
byte[] decodedVector = HexFormat.of().parseHex(context.parser().text());
474+
yield decodedVector.length;
475+
}
476+
default -> throw new ParsingException(
477+
context.parser().getTokenLocation(),
478+
format("Unsupported type [%s] for provided value [%s]", currentToken, context.parser().text())
479+
);
480+
};
481+
}
460482
},
461483

462484
FLOAT(4) {

0 commit comments

Comments
 (0)