Skip to content

Commit 3652c36

Browse files
committed
addressing PR comments
1 parent 38cac73 commit 3652c36

File tree

2 files changed

+9
-73
lines changed

2 files changed

+9
-73
lines changed

server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java

Lines changed: 6 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -815,15 +815,6 @@ VectorData parseBase64EncodedVector(String s, IntBooleanConsumer dimChecker, Vec
815815
return parseStringValue(s, dimChecker, similarity, Base64.getDecoder()::decode);
816816
}
817817

818-
VectorData parseBase64BinaryEncodedVector(byte[] binaryValue, IntBooleanConsumer dimChecker, VectorSimilarity similarity) {
819-
byte[] decodedVector = Base64.getDecoder().decode(binaryValue);
820-
dimChecker.accept(decodedVector.length, true);
821-
VectorData vectorData = VectorData.fromBytes(decodedVector);
822-
double squaredMagnitude = computeSquaredMagnitude(vectorData);
823-
checkVectorMagnitude(similarity, errorElementsAppender(decodedVector), (float) squaredMagnitude);
824-
return vectorData;
825-
}
826-
827818
@Override
828819
public VectorData parseKnnVector(
829820
DocumentParserContext context,
@@ -834,7 +825,6 @@ public VectorData parseKnnVector(
834825
XContentParser.Token token = context.parser().currentToken();
835826
return switch (token) {
836827
case START_ARRAY -> parseVectorArray(context, dims, dimChecker, similarity);
837-
case VALUE_EMBEDDED_OBJECT -> parseBase64BinaryEncodedVector(context.parser().binaryValue(), dimChecker, similarity);
838828
case VALUE_STRING -> {
839829
String s = context.parser().text();
840830
if (s.length() == dims * 2) {
@@ -886,7 +876,6 @@ static boolean isMaybeHexString(String s) {
886876
public int parseDimensionCount(DocumentParserContext context) throws IOException {
887877
XContentParser.Token currentToken = context.parser().currentToken();
888878
return switch (currentToken) {
889-
case VALUE_EMBEDDED_OBJECT -> Base64.getDecoder().decode(context.parser().binaryValue()).length;
890879
case START_ARRAY -> {
891880
int index = 0;
892881
for (Token token = context.parser().nextToken(); token != Token.END_ARRAY; token = context.parser().nextToken()) {
@@ -896,8 +885,8 @@ public int parseDimensionCount(DocumentParserContext context) throws IOException
896885
}
897886
case VALUE_STRING -> {
898887
String v = context.parser().text();
899-
// Base64 is always divisible by 4, so if it's not, assume hex
900-
if (v.length() % 4 != 0 || isMaybeHexString(v)) {
888+
// Base64 is always divisible by 4, so if it's not try hex
889+
if (v.length() % 4 != 0) {
901890
try {
902891
yield HexFormat.of().parseHex(v).length;
903892
} catch (IllegalArgumentException e) {
@@ -1046,20 +1035,6 @@ public int parseDimensionCount(DocumentParserContext context) throws IOException
10461035
}
10471036
yield index;
10481037
}
1049-
case VALUE_EMBEDDED_OBJECT -> {
1050-
byte[] vector = Base64.getDecoder().decode(context.parser().binaryValue());
1051-
if (vector.length % Float.BYTES != 0) {
1052-
throw new ParsingException(
1053-
context.parser().getTokenLocation(),
1054-
"Failed to parse object: Embedded vector byte length ["
1055-
+ vector.length
1056-
+ "] is not a multiple of ["
1057-
+ Float.BYTES
1058-
+ "]"
1059-
);
1060-
}
1061-
yield vector.length / Float.BYTES;
1062-
}
10631038
case VALUE_STRING -> {
10641039
byte[] decodedVectorBytes = Base64.getDecoder().decode(context.parser().text());
10651040
if (decodedVectorBytes.length % Float.BYTES != 0) {
@@ -1138,7 +1113,6 @@ VectorDataAndMagnitude parseFloatVectorInput(DocumentParserContext context, int
11381113
XContentParser.Token token = context.parser().currentToken();
11391114
return switch (token) {
11401115
case START_ARRAY -> parseVectorArray(context, dimChecker, dims);
1141-
case VALUE_EMBEDDED_OBJECT -> parseBase64BinaryEncodedVector(context, dimChecker, dims);
11421116
case VALUE_STRING -> parseBase64EncodedVector(context, dimChecker, dims);
11431117
default -> throw new ParsingException(
11441118
context.parser().getTokenLocation(),
@@ -1163,31 +1137,6 @@ VectorDataAndMagnitude parseVectorArray(DocumentParserContext context, IntBoolea
11631137
return new VectorDataAndMagnitude(VectorData.fromFloats(vector), squaredMagnitude);
11641138
}
11651139

1166-
VectorDataAndMagnitude parseBase64BinaryEncodedVector(DocumentParserContext context, IntBooleanConsumer dimChecker, int dims)
1167-
throws IOException {
1168-
// BIG_ENDIAN is the default, but just being explicit here
1169-
byte[] binaryValue = context.parser().binaryValue();
1170-
ByteBuffer byteBuffer = ByteBuffer.wrap(Base64.getDecoder().decode(binaryValue)).order(ByteOrder.BIG_ENDIAN);
1171-
if (byteBuffer.remaining() != dims * Float.BYTES) {
1172-
throw new ParsingException(
1173-
context.parser().getTokenLocation(),
1174-
"Failed to parse object: Embedded vector byte length ["
1175-
+ byteBuffer.remaining()
1176-
+ "] does not match the expected length of ["
1177-
+ (dims * Float.BYTES)
1178-
+ "] for dimension count ["
1179-
+ dims
1180-
+ "]"
1181-
);
1182-
}
1183-
float[] decodedVector = new float[dims];
1184-
byteBuffer.asFloatBuffer().get(decodedVector);
1185-
dimChecker.accept(decodedVector.length, true);
1186-
VectorData vectorData = VectorData.fromFloats(decodedVector);
1187-
float squaredMagnitude = (float) computeSquaredMagnitude(vectorData);
1188-
return new VectorDataAndMagnitude(vectorData, squaredMagnitude);
1189-
}
1190-
11911140
VectorDataAndMagnitude parseBase64EncodedVector(DocumentParserContext context, IntBooleanConsumer dimChecker, int dims)
11921141
throws IOException {
11931142
// BIG_ENDIAN is the default, but just being explicit here
@@ -1296,13 +1245,6 @@ VectorData parseStringValue(
12961245
return VectorData.fromBytes(decodedVector);
12971246
}
12981247

1299-
@Override
1300-
VectorData parseBase64BinaryEncodedVector(byte[] binaryValue, IntBooleanConsumer dimChecker, VectorSimilarity similarity) {
1301-
byte[] decodedVector = Base64.getDecoder().decode(binaryValue);
1302-
dimChecker.accept(decodedVector.length * Byte.SIZE, true);
1303-
return VectorData.fromBytes(decodedVector);
1304-
}
1305-
13061248
@Override
13071249
public int getNumBytes(int dimensions) {
13081250
assert dimensions % Byte.SIZE == 0;
@@ -2514,16 +2456,10 @@ public List<Object> fetchValues(Source source, int doc, List<Object> ignoredValu
25142456
if (sourceValue == null) {
25152457
return List.of();
25162458
}
2517-
try {
2518-
switch (sourceValue) {
2519-
case List<?> v -> values.addAll(v);
2520-
case String s -> values.add(s);
2521-
default -> ignoredValues.add(sourceValue);
2522-
}
2523-
} catch (Exception e) {
2524-
// if parsing fails here then it would have failed at index time
2525-
// as well, meaning that we must be ignoring malformed values.
2526-
ignoredValues.add(sourceValue);
2459+
switch (sourceValue) {
2460+
case List<?> v -> values.addAll(v);
2461+
case String s -> values.add(s);
2462+
default -> ignoredValues.add(sourceValue);
25272463
}
25282464
}
25292465
values.trimToSize();

x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/DenseVectorFieldTypeIT.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ public DenseVectorFieldTypeIT(
9191
@Name("elementType") ElementType elementType,
9292
@Name("similarity") DenseVectorFieldMapper.VectorSimilarity similarity,
9393
@Name("index") boolean index,
94-
@Name("synthetic") VectorSourceOptions sourceOptions
94+
@Name("sourceOptions") VectorSourceOptions sourceOptions
9595
) {
9696
this.elementType = elementType;
9797
this.similarity = similarity;
@@ -248,14 +248,14 @@ public void setup() throws IOException {
248248
}
249249
final ByteBuffer buffer = ByteBuffer.allocate(Float.BYTES * numDims);
250250
buffer.asFloatBuffer().put(array);
251-
yield Base64.getEncoder().encode(buffer.array());
251+
yield Base64.getEncoder().encodeToString(buffer.array());
252252
}
253253
case BYTE, BIT -> {
254254
byte[] array = new byte[numDims];
255255
for (int k = 0; k < numDims; k++) {
256256
array[k] = vector.get(k).byteValue();
257257
}
258-
yield randomBoolean() ? Base64.getEncoder().encode(array) : HexFormat.of().formatHex(array);
258+
yield randomBoolean() ? Base64.getEncoder().encodeToString(array) : HexFormat.of().formatHex(array);
259259
}
260260
};
261261
}

0 commit comments

Comments
 (0)