Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/

package org.elasticsearch.xcontent.provider;

import org.elasticsearch.xcontent.Text;

import java.io.IOException;

/**
* Indicates that a {@link com.fasterxml.jackson.core.JsonParser} is capable of
* returning the underlying UTF-8 encoded bytes of the current string token.
* This is useful for performance optimizations, as it allows the parser to
* avoid unnecessary conversions to and from strings.
*/
public interface OptimizedTextCapable {

/**
* Method that will try to get underlying UTF-8 encoded bytes of the current string token.
* This is only a best-effort attempt; if there is some reason the bytes cannot be retrieved, this method will return null.
*/
Text getValueAsText() throws IOException;
}
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ public static XContent cborXContent() {
}

static {
cborFactory = XContentImplUtils.configure(CBORFactory.builder());
cborFactory = XContentImplUtils.configure(ESCborFactory.builder());
cborFactory.configure(CBORFactory.Feature.FAIL_ON_SYMBOL_HASH_OVERFLOW, false); // this trips on many mappings now...
// Do not automatically close unclosed objects/arrays in com.fasterxml.jackson.dataformat.cbor.CBORGenerator#close() method
cborFactory.configure(JsonGenerator.Feature.AUTO_CLOSE_JSON_CONTENT, false);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/

package org.elasticsearch.xcontent.provider.cbor;

import com.fasterxml.jackson.core.io.IOContext;
import com.fasterxml.jackson.core.sym.ByteQuadsCanonicalizer;
import com.fasterxml.jackson.dataformat.cbor.CBORFactory;
import com.fasterxml.jackson.dataformat.cbor.CBORParser;

public class ESCborFactory extends CBORFactory {

public static ESCborFactoryBuilder builder() {
return new ESCborFactoryBuilder();
}

@Override
protected CBORParser _createParser(byte[] data, int offset, int len, IOContext ctxt) {
ByteQuadsCanonicalizer can = _byteSymbolCanonicalizer.makeChildOrPlaceholder(_factoryFeatures);
return new ESCborParser(ctxt, _parserFeatures, _formatParserFeatures, _objectCodec, can, null, data, offset, offset + len, false);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/

package org.elasticsearch.xcontent.provider.cbor;

import com.fasterxml.jackson.dataformat.cbor.CBORFactory;
import com.fasterxml.jackson.dataformat.cbor.CBORFactoryBuilder;

public class ESCborFactoryBuilder extends CBORFactoryBuilder {

@Override
public CBORFactory build() {
return new ESCborFactory();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,219 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/

package org.elasticsearch.xcontent.provider.cbor;

import com.fasterxml.jackson.core.JsonToken;
import com.fasterxml.jackson.core.ObjectCodec;
import com.fasterxml.jackson.core.io.IOContext;
import com.fasterxml.jackson.core.sym.ByteQuadsCanonicalizer;
import com.fasterxml.jackson.dataformat.cbor.CBORConstants;
import com.fasterxml.jackson.dataformat.cbor.CBORParser;

import org.elasticsearch.xcontent.Text;
import org.elasticsearch.xcontent.XContentString;
import org.elasticsearch.xcontent.provider.OptimizedTextCapable;

import java.io.IOException;
import java.io.InputStream;
import java.util.Locale;

public class ESCborParser extends CBORParser implements OptimizedTextCapable {
public ESCborParser(
IOContext ctxt,
int parserFeatures,
int cborFeatures,
ObjectCodec codec,
ByteQuadsCanonicalizer sym,
InputStream in,
byte[] inputBuffer,
int start,
int end,
boolean bufferRecyclable
) {
super(ctxt, parserFeatures, cborFeatures, codec, sym, in, inputBuffer, start, end, bufferRecyclable);
}

@Override
public Text getValueAsText() throws IOException {
JsonToken t = _currToken;
if (_tokenIncomplete) {
if (t == JsonToken.VALUE_STRING) {
return _finishAndReturnText(_typeByte);
}
}
return null;
}

private Text _finishAndReturnText(int ch) throws IOException {
_tokenIncomplete = false;
_sharedString = null;
final int type = ((ch >> 5) & 0x7);
ch &= 0x1F;

// sanity check
if (type != CBORConstants.MAJOR_TYPE_TEXT) {
// should never happen so
_throwInternal();
}

// String value, decode
final int len = _decodeExplicitLength(ch);
if (len == 0) {
return new Text(new XContentString.UTF8Bytes(new byte[0], 0, 0), 0);
}
if (len < 0) {
// optimized text is not supported for chunked strings
return null;
}
final int available = _inputEnd - _inputPtr;
if (available >= len) {
Text text = new Text(new XContentString.UTF8Bytes(_inputBuffer, _inputPtr, len));
_inputPtr += len;
return text;
}
byte[] bytes = new byte[len];
for (int i = 0; i < len; i++) {
bytes[i] = _nextByte();
}
return new Text(new XContentString.UTF8Bytes(bytes, 0, len));
}

private byte _nextByte() throws IOException {
int inPtr = _inputPtr;
if (inPtr < _inputEnd) {
byte b = _inputBuffer[inPtr];
_inputPtr = inPtr + 1;
return b;
}
loadMoreGuaranteed();
return _inputBuffer[_inputPtr++];
}

/**
* Method used to decode explicit length of a variable-length value
* (or, for indefinite/chunked, indicate that one is not known).
* Note that long (64-bit) length is only allowed if it fits in
* 32-bit signed int, for now; expectation being that longer values
* are always encoded as chunks.
*/
private int _decodeExplicitLength(int lowBits) throws IOException {
// common case, indefinite length; relies on marker
if (lowBits == 31) {
return -1;
}
if (lowBits <= 23) {
return lowBits;
}
switch (lowBits - 24) {
case 0:
return _decode8Bits();
case 1:
return _decode16Bits();
case 2:
return _decode32Bits();
case 3:
long l = _decode64Bits();
if (l < 0 || l > MAX_INT_L) {
throw _constructError("Illegal length for " + currentToken() + ": " + l);
}
return (int) l;
}
throw _constructError(
String.format(
Locale.ROOT,
"Invalid 5-bit length indicator for `JsonToken.%s`: 0x%02X; only 0x00-0x17, 0x1F allowed",
currentToken(),
lowBits
)
);
}

private int _decode8Bits() throws IOException {
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
return _inputBuffer[_inputPtr++] & 0xFF;
}

private int _decode16Bits() throws IOException {
int ptr = _inputPtr;
if ((ptr + 1) >= _inputEnd) {
return _slow16();
}
final byte[] b = _inputBuffer;
int v = ((b[ptr] & 0xFF) << 8) + (b[ptr + 1] & 0xFF);
_inputPtr = ptr + 2;
return v;
}

private int _slow16() throws IOException {
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
int v = (_inputBuffer[_inputPtr++] & 0xFF);
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
return (v << 8) + (_inputBuffer[_inputPtr++] & 0xFF);
}

private int _decode32Bits() throws IOException {
int ptr = _inputPtr;
if ((ptr + 3) >= _inputEnd) {
return _slow32();
}
final byte[] b = _inputBuffer;
int v = (b[ptr++] << 24) + ((b[ptr++] & 0xFF) << 16) + ((b[ptr++] & 0xFF) << 8) + (b[ptr++] & 0xFF);
_inputPtr = ptr;
return v;
}

private int _slow32() throws IOException {
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
int v = _inputBuffer[_inputPtr++]; // sign will disappear anyway
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
v = (v << 8) + (_inputBuffer[_inputPtr++] & 0xFF);
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
v = (v << 8) + (_inputBuffer[_inputPtr++] & 0xFF);
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
return (v << 8) + (_inputBuffer[_inputPtr++] & 0xFF);
}

private long _decode64Bits() throws IOException {
int ptr = _inputPtr;
if ((ptr + 7) >= _inputEnd) {
return _slow64();
}
final byte[] b = _inputBuffer;
int i1 = (b[ptr++] << 24) + ((b[ptr++] & 0xFF) << 16) + ((b[ptr++] & 0xFF) << 8) + (b[ptr++] & 0xFF);
int i2 = (b[ptr++] << 24) + ((b[ptr++] & 0xFF) << 16) + ((b[ptr++] & 0xFF) << 8) + (b[ptr++] & 0xFF);
_inputPtr = ptr;
return _long(i1, i2);
}

private long _slow64() throws IOException {
return _long(_decode32Bits(), _decode32Bits());
}

private static long _long(int i1, int i2) {
long l1 = i1;
long l2 = i2;
l2 = (l2 << 32) >>> 32;
return (l1 << 32) + l2;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,14 @@

import org.elasticsearch.xcontent.Text;
import org.elasticsearch.xcontent.XContentString;
import org.elasticsearch.xcontent.provider.OptimizedTextCapable;

import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;

public class ESUTF8StreamJsonParser extends UTF8StreamJsonParser {
public class ESUTF8StreamJsonParser extends UTF8StreamJsonParser implements OptimizedTextCapable {
protected int stringEnd = -1;
protected int stringLength;

Expand All @@ -49,6 +50,7 @@ public ESUTF8StreamJsonParser(
* Method that will try to get underlying UTF-8 encoded bytes of the current string token.
* This is only a best-effort attempt; if there is some reason the bytes cannot be retrieved, this method will return null.
*/
@Override
public Text getValueAsText() throws IOException {
if (_currToken == JsonToken.VALUE_STRING && _tokenIncomplete) {
if (stringEnd > 0) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import org.elasticsearch.xcontent.XContentParserConfiguration;
import org.elasticsearch.xcontent.XContentString;
import org.elasticsearch.xcontent.XContentType;
import org.elasticsearch.xcontent.provider.OptimizedTextCapable;
import org.elasticsearch.xcontent.provider.XContentParserConfigurationImpl;
import org.elasticsearch.xcontent.support.AbstractXContentParser;

Expand Down Expand Up @@ -146,8 +147,8 @@ public XContentString optimizedText() throws IOException {
if (currentToken().isValue() == false) {
throwOnNoText();
}
if (parser instanceof ESUTF8StreamJsonParser esParser) {
var bytesRef = esParser.getValueAsText();
if (parser instanceof OptimizedTextCapable optimizedTextCapableParser) {
var bytesRef = optimizedTextCapableParser.getValueAsText();
if (bytesRef != null) {
return bytesRef;
}
Expand Down
Loading