@@ -808,11 +808,20 @@ VectorData parseStringValue(
808808 }
809809
810810 VectorData parseHexEncodedVector (String s , IntBooleanConsumer dimChecker , VectorSimilarity similarity ) {
811- return parseStringValue (s , dimChecker , similarity , str -> HexFormat .of (). parseHex ( str ) );
811+ return parseStringValue (s , dimChecker , similarity , HexFormat .of ():: parseHex );
812812 }
813813
814814 VectorData parseBase64EncodedVector (String s , IntBooleanConsumer dimChecker , VectorSimilarity similarity ) {
815- return parseStringValue (s , dimChecker , similarity , str -> Base64 .getDecoder ().decode (str ));
815+ return parseStringValue (s , dimChecker , similarity , Base64 .getDecoder ()::decode );
816+ }
817+
818+ VectorData parseBase64BinaryEncodedVector (byte [] binaryValue , IntBooleanConsumer dimChecker , VectorSimilarity similarity ) {
819+ byte [] decodedVector = Base64 .getDecoder ().decode (binaryValue );
820+ dimChecker .accept (decodedVector .length , true );
821+ VectorData vectorData = VectorData .fromBytes (decodedVector );
822+ double squaredMagnitude = computeSquaredMagnitude (vectorData );
823+ checkVectorMagnitude (similarity , errorElementsAppender (decodedVector ), (float ) squaredMagnitude );
824+ return vectorData ;
816825 }
817826
818827 @ Override
@@ -825,9 +834,10 @@ public VectorData parseKnnVector(
825834 XContentParser .Token token = context .parser ().currentToken ();
826835 return switch (token ) {
827836 case START_ARRAY -> parseVectorArray (context , dims , dimChecker , similarity );
837+ case VALUE_EMBEDDED_OBJECT -> parseBase64BinaryEncodedVector (context .parser ().binaryValue (), dimChecker , similarity );
828838 case VALUE_STRING -> {
829839 String s = context .parser ().text ();
830- if (s .length () == dims * 2 && isMaybeHexString ( s ) ) {
840+ if (s .length () == dims * 2 ) {
831841 try {
832842 yield parseHexEncodedVector (s , dimChecker , similarity );
833843 } catch (IllegalArgumentException e ) {
@@ -865,8 +875,7 @@ static boolean isMaybeHexString(String s) {
865875 }
866876 for (int i = 0 ; i < len ; i ++) {
867877 char c = s .charAt (i );
868- boolean isHexChar = (c >= '0' && c <= '9' ) || (c >= 'A' && c <= 'F' ) || (c >= 'a' && c <= 'f' );
869- if (isHexChar == false ) {
878+ if (HexFormat .isHexDigit (c ) == false ) {
870879 return false ;
871880 }
872881 }
@@ -877,6 +886,7 @@ static boolean isMaybeHexString(String s) {
877886 public int parseDimensionCount (DocumentParserContext context ) throws IOException {
878887 XContentParser .Token currentToken = context .parser ().currentToken ();
879888 return switch (currentToken ) {
889+ case VALUE_EMBEDDED_OBJECT -> Base64 .getDecoder ().decode (context .parser ().binaryValue ()).length ;
880890 case START_ARRAY -> {
881891 int index = 0 ;
882892 for (Token token = context .parser ().nextToken (); token != Token .END_ARRAY ; token = context .parser ().nextToken ()) {
@@ -1028,13 +1038,28 @@ public double computeSquaredMagnitude(VectorData vectorData) {
10281038 public int parseDimensionCount (DocumentParserContext context ) throws IOException {
10291039 XContentParser .Token currentToken = context .parser ().currentToken ();
10301040 return switch (currentToken ) {
1041+
10311042 case START_ARRAY -> {
10321043 int index = 0 ;
10331044 for (Token token = context .parser ().nextToken (); token != Token .END_ARRAY ; token = context .parser ().nextToken ()) {
10341045 index ++;
10351046 }
10361047 yield index ;
10371048 }
1049+ case VALUE_EMBEDDED_OBJECT -> {
1050+ byte [] vector = Base64 .getDecoder ().decode (context .parser ().binaryValue ());
1051+ if (vector .length % Float .BYTES != 0 ) {
1052+ throw new ParsingException (
1053+ context .parser ().getTokenLocation (),
1054+ "Failed to parse object: Embedded vector byte length ["
1055+ + vector .length
1056+ + "] is not a multiple of ["
1057+ + Float .BYTES
1058+ + "]"
1059+ );
1060+ }
1061+ yield vector .length / Float .BYTES ;
1062+ }
10381063 case VALUE_STRING -> {
10391064 byte [] decodedVectorBytes = Base64 .getDecoder ().decode (context .parser ().text ());
10401065 if (decodedVectorBytes .length % Float .BYTES != 0 ) {
@@ -1113,6 +1138,7 @@ VectorDataAndMagnitude parseFloatVectorInput(DocumentParserContext context, int
11131138 XContentParser .Token token = context .parser ().currentToken ();
11141139 return switch (token ) {
11151140 case START_ARRAY -> parseVectorArray (context , dimChecker , dims );
1141+ case VALUE_EMBEDDED_OBJECT -> parseBase64BinaryEncodedVector (context , dimChecker , dims );
11161142 case VALUE_STRING -> parseBase64EncodedVector (context , dimChecker , dims );
11171143 default -> throw new ParsingException (
11181144 context .parser ().getTokenLocation (),
@@ -1137,8 +1163,34 @@ VectorDataAndMagnitude parseVectorArray(DocumentParserContext context, IntBoolea
11371163 return new VectorDataAndMagnitude (VectorData .fromFloats (vector ), squaredMagnitude );
11381164 }
11391165
1166+ VectorDataAndMagnitude parseBase64BinaryEncodedVector (DocumentParserContext context , IntBooleanConsumer dimChecker , int dims )
1167+ throws IOException {
1168+ // BIG_ENDIAN is the default, but just being explicit here
1169+ byte [] binaryValue = context .parser ().binaryValue ();
1170+ ByteBuffer byteBuffer = ByteBuffer .wrap (Base64 .getDecoder ().decode (binaryValue )).order (ByteOrder .BIG_ENDIAN );
1171+ if (byteBuffer .remaining () != dims * Float .BYTES ) {
1172+ throw new ParsingException (
1173+ context .parser ().getTokenLocation (),
1174+ "Failed to parse object: Embedded vector byte length ["
1175+ + byteBuffer .remaining ()
1176+ + "] does not match the expected length of ["
1177+ + (dims * Float .BYTES )
1178+ + "] for dimension count ["
1179+ + dims
1180+ + "]"
1181+ );
1182+ }
1183+ float [] decodedVector = new float [dims ];
1184+ byteBuffer .asFloatBuffer ().get (decodedVector );
1185+ dimChecker .accept (decodedVector .length , true );
1186+ VectorData vectorData = VectorData .fromFloats (decodedVector );
1187+ float squaredMagnitude = (float ) computeSquaredMagnitude (vectorData );
1188+ return new VectorDataAndMagnitude (vectorData , squaredMagnitude );
1189+ }
1190+
11401191 VectorDataAndMagnitude parseBase64EncodedVector (DocumentParserContext context , IntBooleanConsumer dimChecker , int dims )
11411192 throws IOException {
1193+ // BIG_ENDIAN is the default, but just being explicit here
11421194 ByteBuffer byteBuffer = ByteBuffer .wrap (Base64 .getDecoder ().decode (context .parser ().text ())).order (ByteOrder .BIG_ENDIAN );
11431195 if (byteBuffer .remaining () != dims * Float .BYTES ) {
11441196 throw new ParsingException (
@@ -1244,6 +1296,13 @@ VectorData parseStringValue(
12441296 return VectorData .fromBytes (decodedVector );
12451297 }
12461298
1299+ @ Override
1300+ VectorData parseBase64BinaryEncodedVector (byte [] binaryValue , IntBooleanConsumer dimChecker , VectorSimilarity similarity ) {
1301+ byte [] decodedVector = Base64 .getDecoder ().decode (binaryValue );
1302+ dimChecker .accept (decodedVector .length * Byte .SIZE , true );
1303+ return VectorData .fromBytes (decodedVector );
1304+ }
1305+
12471306 @ Override
12481307 public int getNumBytes (int dimensions ) {
12491308 assert dimensions % Byte .SIZE == 0 ;
@@ -2456,12 +2515,10 @@ public List<Object> fetchValues(Source source, int doc, List<Object> ignoredValu
24562515 return List .of ();
24572516 }
24582517 try {
2459- if (sourceValue instanceof List <?> v ) {
2460- values .addAll (v );
2461- } else if (sourceValue instanceof String s ) {
2462- values .add (s );
2463- } else {
2464- ignoredValues .add (sourceValue );
2518+ switch (sourceValue ) {
2519+ case List <?> v -> values .addAll (v );
2520+ case String s -> values .add (s );
2521+ default -> ignoredValues .add (sourceValue );
24652522 }
24662523 } catch (Exception e ) {
24672524 // if parsing fails here then it would have failed at index time
@@ -2922,7 +2979,8 @@ public List<Object> fetchValues(Source source, int doc, List<Object> ignoredValu
29222979 values .add (NumberFieldMapper .NumberType .FLOAT .parse (o , false ));
29232980 }
29242981 } else if (sourceValue instanceof String s ) {
2925- if ((element == BYTE_ELEMENT || element == BIT_ELEMENT )
2982+ if ((element .elementType () == BYTE_ELEMENT .elementType ()
2983+ || element .elementType () == BIT_ELEMENT .elementType ())
29262984 && s .length () == dims * 2
29272985 && ByteElement .isMaybeHexString (s )) {
29282986 byte [] bytes ;
0 commit comments