elastic · felixbarny · Aug 8, 2025 · Aug 7, 2025 · Aug 7, 2025 · Aug 8, 2025
@@ -0,0 +1,29 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the "Elastic License
+ * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+ * Public License v 1"; you may not use this file except in compliance with, at
+ * your election, the "Elastic License 2.0", the "GNU Affero General Public
+ * License v3.0 only", or the "Server Side Public License, v 1".
+ */
+
+package org.elasticsearch.xcontent.provider;
+
+import org.elasticsearch.xcontent.Text;
+
+import java.io.IOException;
+
+/**
+ * Indicates that a {@link com.fasterxml.jackson.core.JsonParser} is capable of
+ * returning the underlying UTF-8 encoded bytes of the current string token.
+ * This is useful for performance optimizations, as it allows the parser to
+ * avoid unnecessary conversions to and from strings.
+ */
+public interface OptimizedTextCapable {
+
+    /**
+     * Method that will try to get underlying UTF-8 encoded bytes of the current string token.
+     * This is only a best-effort attempt; if there is some reason the bytes cannot be retrieved, this method will return null.
+     */
+    Text getValueAsText() throws IOException;
+}
@@ -47,7 +47,7 @@ public static XContent cborXContent() {
     }
 
     static {
-        cborFactory = XContentImplUtils.configure(CBORFactory.builder());
+        cborFactory = XContentImplUtils.configure(ESCborFactory.builder());
         cborFactory.configure(CBORFactory.Feature.FAIL_ON_SYMBOL_HASH_OVERFLOW, false); // this trips on many mappings now...
         // Do not automatically close unclosed objects/arrays in com.fasterxml.jackson.dataformat.cbor.CBORGenerator#close() method
         cborFactory.configure(JsonGenerator.Feature.AUTO_CLOSE_JSON_CONTENT, false);

@@ -0,0 +1,28 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the "Elastic License
+ * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+ * Public License v 1"; you may not use this file except in compliance with, at
+ * your election, the "Elastic License 2.0", the "GNU Affero General Public
+ * License v3.0 only", or the "Server Side Public License, v 1".
+ */
+
+package org.elasticsearch.xcontent.provider.cbor;
+
+import com.fasterxml.jackson.core.io.IOContext;
+import com.fasterxml.jackson.core.sym.ByteQuadsCanonicalizer;
+import com.fasterxml.jackson.dataformat.cbor.CBORFactory;
+import com.fasterxml.jackson.dataformat.cbor.CBORParser;
+
+public class ESCborFactory extends CBORFactory {
+
+    public static ESCborFactoryBuilder builder() {
+        return new ESCborFactoryBuilder();
+    }
+
+    @Override
+    protected CBORParser _createParser(byte[] data, int offset, int len, IOContext ctxt) {
+        ByteQuadsCanonicalizer can = _byteSymbolCanonicalizer.makeChildOrPlaceholder(_factoryFeatures);
+        return new ESCborParser(ctxt, _parserFeatures, _formatParserFeatures, _objectCodec, can, null, data, offset, offset + len, false);
+    }
+}
@@ -0,0 +1,21 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the "Elastic License
+ * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+ * Public License v 1"; you may not use this file except in compliance with, at
+ * your election, the "Elastic License 2.0", the "GNU Affero General Public
+ * License v3.0 only", or the "Server Side Public License, v 1".
+ */
+
+package org.elasticsearch.xcontent.provider.cbor;
+
+import com.fasterxml.jackson.dataformat.cbor.CBORFactory;
+import com.fasterxml.jackson.dataformat.cbor.CBORFactoryBuilder;
+
+public class ESCborFactoryBuilder extends CBORFactoryBuilder {
+
+    @Override
+    public CBORFactory build() {
+        return new ESCborFactory();
+    }
+}
@@ -0,0 +1,219 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the "Elastic License
+ * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+ * Public License v 1"; you may not use this file except in compliance with, at
+ * your election, the "Elastic License 2.0", the "GNU Affero General Public
+ * License v3.0 only", or the "Server Side Public License, v 1".
+ */
+
+package org.elasticsearch.xcontent.provider.cbor;
+
+import com.fasterxml.jackson.core.JsonToken;
+import com.fasterxml.jackson.core.ObjectCodec;
+import com.fasterxml.jackson.core.io.IOContext;
+import com.fasterxml.jackson.core.sym.ByteQuadsCanonicalizer;
+import com.fasterxml.jackson.dataformat.cbor.CBORConstants;
+import com.fasterxml.jackson.dataformat.cbor.CBORParser;
+
+import org.elasticsearch.xcontent.Text;
+import org.elasticsearch.xcontent.XContentString;
+import org.elasticsearch.xcontent.provider.OptimizedTextCapable;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Locale;
+
+public class ESCborParser extends CBORParser implements OptimizedTextCapable {
+    public ESCborParser(
+        IOContext ctxt,
+        int parserFeatures,
+        int cborFeatures,
+        ObjectCodec codec,
+        ByteQuadsCanonicalizer sym,
+        InputStream in,
+        byte[] inputBuffer,
+        int start,
+        int end,
+        boolean bufferRecyclable
+    ) {
+        super(ctxt, parserFeatures, cborFeatures, codec, sym, in, inputBuffer, start, end, bufferRecyclable);
+    }
+
+    @Override
+    public Text getValueAsText() throws IOException {
+        JsonToken t = _currToken;
+        if (_tokenIncomplete) {
+            if (t == JsonToken.VALUE_STRING) {
+                return _finishAndReturnText(_typeByte);
+            }
+        }
+        return null;
+    }
+
+    private Text _finishAndReturnText(int ch) throws IOException {
+        _tokenIncomplete = false;
+        _sharedString = null;
+        final int type = ((ch >> 5) & 0x7);
+        ch &= 0x1F;
+
+        // sanity check
+        if (type != CBORConstants.MAJOR_TYPE_TEXT) {
+            // should never happen so
+            _throwInternal();
+        }
+
+        // String value, decode
+        final int len = _decodeExplicitLength(ch);
+        if (len == 0) {
+            return new Text(new XContentString.UTF8Bytes(new byte[0], 0, 0), 0);
+        }
+        if (len < 0) {
+            // optimized text is not supported for chunked strings
+            return null;
+        }
+        final int available = _inputEnd - _inputPtr;
+        if (available >= len) {
+            Text text = new Text(new XContentString.UTF8Bytes(_inputBuffer, _inputPtr, len));
+            _inputPtr += len;
+            return text;
+        }
+        byte[] bytes = new byte[len];
+        for (int i = 0; i < len; i++) {
+            bytes[i] = _nextByte();
+        }
+        return new Text(new XContentString.UTF8Bytes(bytes, 0, len));
+    }
+
+    private byte _nextByte() throws IOException {
+        int inPtr = _inputPtr;
+        if (inPtr < _inputEnd) {
+            byte b = _inputBuffer[inPtr];
+            _inputPtr = inPtr + 1;
+            return b;
+        }
+        loadMoreGuaranteed();
+        return _inputBuffer[_inputPtr++];
+    }
+
+    /**
+     * Method used to decode explicit length of a variable-length value
+     * (or, for indefinite/chunked, indicate that one is not known).
+     * Note that long (64-bit) length is only allowed if it fits in
+     * 32-bit signed int, for now; expectation being that longer values
+     * are always encoded as chunks.
+     */
+    private int _decodeExplicitLength(int lowBits) throws IOException {
+        // common case, indefinite length; relies on marker
+        if (lowBits == 31) {
+            return -1;
+        }
+        if (lowBits <= 23) {
+            return lowBits;
+        }
+        switch (lowBits - 24) {
+            case 0:
+                return _decode8Bits();
+            case 1:
+                return _decode16Bits();
+            case 2:
+                return _decode32Bits();
+            case 3:
+                long l = _decode64Bits();
+                if (l < 0 || l > MAX_INT_L) {
+                    throw _constructError("Illegal length for " + currentToken() + ": " + l);
+                }
+                return (int) l;
+        }
+        throw _constructError(
+            String.format(
+                Locale.ROOT,
+                "Invalid 5-bit length indicator for `JsonToken.%s`: 0x%02X; only 0x00-0x17, 0x1F allowed",
+                currentToken(),
+                lowBits
+            )
+        );
+    }
+
+    private int _decode8Bits() throws IOException {
+        if (_inputPtr >= _inputEnd) {
+            loadMoreGuaranteed();
+        }
+        return _inputBuffer[_inputPtr++] & 0xFF;
+    }
+
+    private int _decode16Bits() throws IOException {
+        int ptr = _inputPtr;
+        if ((ptr + 1) >= _inputEnd) {
+            return _slow16();
+        }
+        final byte[] b = _inputBuffer;
+        int v = ((b[ptr] & 0xFF) << 8) + (b[ptr + 1] & 0xFF);
+        _inputPtr = ptr + 2;
+        return v;
+    }
+
+    private int _slow16() throws IOException {
+        if (_inputPtr >= _inputEnd) {
+            loadMoreGuaranteed();
+        }
+        int v = (_inputBuffer[_inputPtr++] & 0xFF);
+        if (_inputPtr >= _inputEnd) {
+            loadMoreGuaranteed();
+        }
+        return (v << 8) + (_inputBuffer[_inputPtr++] & 0xFF);
+    }
+
+    private int _decode32Bits() throws IOException {
+        int ptr = _inputPtr;
+        if ((ptr + 3) >= _inputEnd) {
+            return _slow32();
+        }
+        final byte[] b = _inputBuffer;
+        int v = (b[ptr++] << 24) + ((b[ptr++] & 0xFF) << 16) + ((b[ptr++] & 0xFF) << 8) + (b[ptr++] & 0xFF);
+        _inputPtr = ptr;
+        return v;
+    }
+
+    private int _slow32() throws IOException {
+        if (_inputPtr >= _inputEnd) {
+            loadMoreGuaranteed();
+        }
+        int v = _inputBuffer[_inputPtr++]; // sign will disappear anyway
+        if (_inputPtr >= _inputEnd) {
+            loadMoreGuaranteed();
+        }
+        v = (v << 8) + (_inputBuffer[_inputPtr++] & 0xFF);
+        if (_inputPtr >= _inputEnd) {
+            loadMoreGuaranteed();
+        }
+        v = (v << 8) + (_inputBuffer[_inputPtr++] & 0xFF);
+        if (_inputPtr >= _inputEnd) {
+            loadMoreGuaranteed();
+        }
+        return (v << 8) + (_inputBuffer[_inputPtr++] & 0xFF);
+    }
+
+    private long _decode64Bits() throws IOException {
+        int ptr = _inputPtr;
+        if ((ptr + 7) >= _inputEnd) {
+            return _slow64();
+        }
+        final byte[] b = _inputBuffer;
+        int i1 = (b[ptr++] << 24) + ((b[ptr++] & 0xFF) << 16) + ((b[ptr++] & 0xFF) << 8) + (b[ptr++] & 0xFF);
+        int i2 = (b[ptr++] << 24) + ((b[ptr++] & 0xFF) << 16) + ((b[ptr++] & 0xFF) << 8) + (b[ptr++] & 0xFF);
+        _inputPtr = ptr;
+        return _long(i1, i2);
+    }
+
+    private long _slow64() throws IOException {
+        return _long(_decode32Bits(), _decode32Bits());
+    }
+
+    private static long _long(int i1, int i2) {
+        long l1 = i1;
+        long l2 = i2;
+        l2 = (l2 << 32) >>> 32;
+        return (l1 << 32) + l2;
+    }
+}
@@ -18,13 +18,14 @@
 
 import org.elasticsearch.xcontent.Text;
 import org.elasticsearch.xcontent.XContentString;
+import org.elasticsearch.xcontent.provider.OptimizedTextCapable;
 
 import java.io.IOException;
 import java.io.InputStream;
 import java.util.ArrayList;
 import java.util.List;
 
-public class ESUTF8StreamJsonParser extends UTF8StreamJsonParser {
+public class ESUTF8StreamJsonParser extends UTF8StreamJsonParser implements OptimizedTextCapable {
     protected int stringEnd = -1;
     protected int stringLength;
 
@@ -49,6 +50,7 @@ public ESUTF8StreamJsonParser(
      * Method that will try to get underlying UTF-8 encoded bytes of the current string token.
      * This is only a best-effort attempt; if there is some reason the bytes cannot be retrieved, this method will return null.
      */
+    @Override
     public Text getValueAsText() throws IOException {
         if (_currToken == JsonToken.VALUE_STRING && _tokenIncomplete) {
             if (stringEnd > 0) {

@@ -26,6 +26,7 @@
 import org.elasticsearch.xcontent.XContentParserConfiguration;
 import org.elasticsearch.xcontent.XContentString;
 import org.elasticsearch.xcontent.XContentType;
+import org.elasticsearch.xcontent.provider.OptimizedTextCapable;
 import org.elasticsearch.xcontent.provider.XContentParserConfigurationImpl;
 import org.elasticsearch.xcontent.support.AbstractXContentParser;
 
@@ -146,8 +147,8 @@ public XContentString optimizedText() throws IOException {
         if (currentToken().isValue() == false) {
             throwOnNoText();
         }
-        if (parser instanceof ESUTF8StreamJsonParser esParser) {
-            var bytesRef = esParser.getValueAsText();
+        if (parser instanceof OptimizedTextCapable optimizedTextCapableParser) {
+            var bytesRef = optimizedTextCapableParser.getValueAsText();
             if (bytesRef != null) {
                 return bytesRef;
             }