Skip to content

Commit 9899881

Browse files
committed
Enable optimizedText for CBOR
1 parent 5142d3d commit 9899881

File tree

8 files changed

+357
-4
lines changed

8 files changed

+357
-4
lines changed
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
package org.elasticsearch.xcontent.provider;
11+
12+
import org.elasticsearch.xcontent.Text;
13+
14+
import java.io.IOException;
15+
16+
/**
17+
* Indicates that a {@link com.fasterxml.jackson.core.JsonParser} is capable of
18+
* returning the underlying UTF-8 encoded bytes of the current string token.
19+
* This is useful for performance optimizations, as it allows the parser to
20+
* avoid unnecessary conversions to and from strings.
21+
*/
22+
public interface OptimizedTextCapable {
23+
24+
/**
25+
* Method that will try to get underlying UTF-8 encoded bytes of the current string token.
26+
* This is only a best-effort attempt; if there is some reason the bytes cannot be retrieved, this method will return null.
27+
*/
28+
Text getValueAsText() throws IOException;
29+
}

libs/x-content/impl/src/main/java/org/elasticsearch/xcontent/provider/cbor/CborXContentImpl.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ public static XContent cborXContent() {
4747
}
4848

4949
static {
50-
cborFactory = XContentImplUtils.configure(CBORFactory.builder());
50+
cborFactory = XContentImplUtils.configure(ESCborFactory.builder());
5151
cborFactory.configure(CBORFactory.Feature.FAIL_ON_SYMBOL_HASH_OVERFLOW, false); // this trips on many mappings now...
5252
// Do not automatically close unclosed objects/arrays in com.fasterxml.jackson.dataformat.cbor.CBORGenerator#close() method
5353
cborFactory.configure(JsonGenerator.Feature.AUTO_CLOSE_JSON_CONTENT, false);
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
package org.elasticsearch.xcontent.provider.cbor;
11+
12+
import com.fasterxml.jackson.core.io.IOContext;
13+
import com.fasterxml.jackson.core.sym.ByteQuadsCanonicalizer;
14+
import com.fasterxml.jackson.dataformat.cbor.CBORFactory;
15+
import com.fasterxml.jackson.dataformat.cbor.CBORParser;
16+
17+
public class ESCborFactory extends CBORFactory {
18+
19+
@Override
20+
protected CBORParser _createParser(byte[] data, int offset, int len, IOContext ctxt) {
21+
ByteQuadsCanonicalizer can = _byteSymbolCanonicalizer.makeChildOrPlaceholder(_factoryFeatures);
22+
return new ESCborParser(ctxt, _parserFeatures, _formatParserFeatures, _objectCodec, can, null, data, offset, offset + len, false);
23+
}
24+
}
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
package org.elasticsearch.xcontent.provider.cbor;
11+
12+
import com.fasterxml.jackson.dataformat.cbor.CBORFactory;
13+
import com.fasterxml.jackson.dataformat.cbor.CBORFactoryBuilder;
14+
15+
public class ESCborFactoryBuilder extends CBORFactoryBuilder {
16+
17+
@Override
18+
public CBORFactory build() {
19+
return new ESCborFactory();
20+
}
21+
}
Lines changed: 217 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,217 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
package org.elasticsearch.xcontent.provider.cbor;
11+
12+
import com.fasterxml.jackson.core.JsonToken;
13+
import com.fasterxml.jackson.core.ObjectCodec;
14+
import com.fasterxml.jackson.core.io.IOContext;
15+
import com.fasterxml.jackson.core.sym.ByteQuadsCanonicalizer;
16+
import com.fasterxml.jackson.dataformat.cbor.CBORConstants;
17+
import com.fasterxml.jackson.dataformat.cbor.CBORParser;
18+
19+
import org.elasticsearch.xcontent.Text;
20+
import org.elasticsearch.xcontent.XContentString;
21+
import org.elasticsearch.xcontent.provider.OptimizedTextCapable;
22+
23+
import java.io.IOException;
24+
import java.io.InputStream;
25+
26+
public class ESCborParser extends CBORParser implements OptimizedTextCapable {
27+
public ESCborParser(
28+
IOContext ctxt,
29+
int parserFeatures,
30+
int cborFeatures,
31+
ObjectCodec codec,
32+
ByteQuadsCanonicalizer sym,
33+
InputStream in,
34+
byte[] inputBuffer,
35+
int start,
36+
int end,
37+
boolean bufferRecyclable
38+
) {
39+
super(ctxt, parserFeatures, cborFeatures, codec, sym, in, inputBuffer, start, end, bufferRecyclable);
40+
}
41+
42+
@Override
43+
public Text getValueAsText() throws IOException {
44+
JsonToken t = _currToken;
45+
if (_tokenIncomplete) {
46+
if (t == JsonToken.VALUE_STRING) {
47+
return _finishAndReturnText(_typeByte);
48+
}
49+
}
50+
return null;
51+
}
52+
53+
private Text _finishAndReturnText(int ch) throws IOException {
54+
_tokenIncomplete = false;
55+
_sharedString = null;
56+
final int type = ((ch >> 5) & 0x7);
57+
ch &= 0x1F;
58+
59+
// sanity check
60+
if (type != CBORConstants.MAJOR_TYPE_TEXT) {
61+
// should never happen so
62+
_throwInternal();
63+
}
64+
65+
// String value, decode
66+
final int len = _decodeExplicitLength(ch);
67+
if (len == 0) {
68+
return new Text(new XContentString.UTF8Bytes(new byte[0], 0, 0), 0);
69+
}
70+
if (len < 0) {
71+
// optimized text is not supported for chunked strings
72+
return null;
73+
}
74+
final int available = _inputEnd - _inputPtr;
75+
if (available >= len) {
76+
Text text = new Text(new XContentString.UTF8Bytes(_inputBuffer, _inputPtr, len));
77+
_inputPtr += len;
78+
return text;
79+
}
80+
byte[] bytes = new byte[len];
81+
for (int i = 0; i < len; i++) {
82+
bytes[i] = _nextByte();
83+
}
84+
return new Text(new XContentString.UTF8Bytes(bytes, 0, len));
85+
}
86+
87+
private byte _nextByte() throws IOException {
88+
int inPtr = _inputPtr;
89+
if (inPtr < _inputEnd) {
90+
byte b = _inputBuffer[inPtr];
91+
_inputPtr = inPtr + 1;
92+
return b;
93+
}
94+
loadMoreGuaranteed();
95+
return _inputBuffer[_inputPtr++];
96+
}
97+
98+
/**
99+
* Method used to decode explicit length of a variable-length value
100+
* (or, for indefinite/chunked, indicate that one is not known).
101+
* Note that long (64-bit) length is only allowed if it fits in
102+
* 32-bit signed int, for now; expectation being that longer values
103+
* are always encoded as chunks.
104+
*/
105+
private int _decodeExplicitLength(int lowBits) throws IOException {
106+
// common case, indefinite length; relies on marker
107+
if (lowBits == 31) {
108+
return -1;
109+
}
110+
if (lowBits <= 23) {
111+
return lowBits;
112+
}
113+
switch (lowBits - 24) {
114+
case 0:
115+
return _decode8Bits();
116+
case 1:
117+
return _decode16Bits();
118+
case 2:
119+
return _decode32Bits();
120+
case 3:
121+
long l = _decode64Bits();
122+
if (l < 0 || l > MAX_INT_L) {
123+
throw _constructError("Illegal length for " + currentToken() + ": " + l);
124+
}
125+
return (int) l;
126+
}
127+
throw _constructError(
128+
String.format(
129+
"Invalid 5-bit length indicator for `JsonToken.%s`: 0x%02X; only 0x00-0x17, 0x1F allowed",
130+
currentToken(),
131+
lowBits
132+
)
133+
);
134+
}
135+
136+
private int _decode8Bits() throws IOException {
137+
if (_inputPtr >= _inputEnd) {
138+
loadMoreGuaranteed();
139+
}
140+
return _inputBuffer[_inputPtr++] & 0xFF;
141+
}
142+
143+
private int _decode16Bits() throws IOException {
144+
int ptr = _inputPtr;
145+
if ((ptr + 1) >= _inputEnd) {
146+
return _slow16();
147+
}
148+
final byte[] b = _inputBuffer;
149+
int v = ((b[ptr] & 0xFF) << 8) + (b[ptr + 1] & 0xFF);
150+
_inputPtr = ptr + 2;
151+
return v;
152+
}
153+
154+
private int _slow16() throws IOException {
155+
if (_inputPtr >= _inputEnd) {
156+
loadMoreGuaranteed();
157+
}
158+
int v = (_inputBuffer[_inputPtr++] & 0xFF);
159+
if (_inputPtr >= _inputEnd) {
160+
loadMoreGuaranteed();
161+
}
162+
return (v << 8) + (_inputBuffer[_inputPtr++] & 0xFF);
163+
}
164+
165+
private int _decode32Bits() throws IOException {
166+
int ptr = _inputPtr;
167+
if ((ptr + 3) >= _inputEnd) {
168+
return _slow32();
169+
}
170+
final byte[] b = _inputBuffer;
171+
int v = (b[ptr++] << 24) + ((b[ptr++] & 0xFF) << 16) + ((b[ptr++] & 0xFF) << 8) + (b[ptr++] & 0xFF);
172+
_inputPtr = ptr;
173+
return v;
174+
}
175+
176+
private int _slow32() throws IOException {
177+
if (_inputPtr >= _inputEnd) {
178+
loadMoreGuaranteed();
179+
}
180+
int v = _inputBuffer[_inputPtr++]; // sign will disappear anyway
181+
if (_inputPtr >= _inputEnd) {
182+
loadMoreGuaranteed();
183+
}
184+
v = (v << 8) + (_inputBuffer[_inputPtr++] & 0xFF);
185+
if (_inputPtr >= _inputEnd) {
186+
loadMoreGuaranteed();
187+
}
188+
v = (v << 8) + (_inputBuffer[_inputPtr++] & 0xFF);
189+
if (_inputPtr >= _inputEnd) {
190+
loadMoreGuaranteed();
191+
}
192+
return (v << 8) + (_inputBuffer[_inputPtr++] & 0xFF);
193+
}
194+
195+
private long _decode64Bits() throws IOException {
196+
int ptr = _inputPtr;
197+
if ((ptr + 7) >= _inputEnd) {
198+
return _slow64();
199+
}
200+
final byte[] b = _inputBuffer;
201+
int i1 = (b[ptr++] << 24) + ((b[ptr++] & 0xFF) << 16) + ((b[ptr++] & 0xFF) << 8) + (b[ptr++] & 0xFF);
202+
int i2 = (b[ptr++] << 24) + ((b[ptr++] & 0xFF) << 16) + ((b[ptr++] & 0xFF) << 8) + (b[ptr++] & 0xFF);
203+
_inputPtr = ptr;
204+
return _long(i1, i2);
205+
}
206+
207+
private long _slow64() throws IOException {
208+
return _long(_decode32Bits(), _decode32Bits());
209+
}
210+
211+
private static long _long(int i1, int i2) {
212+
long l1 = i1;
213+
long l2 = i2;
214+
l2 = (l2 << 32) >>> 32;
215+
return (l1 << 32) + l2;
216+
}
217+
}

libs/x-content/impl/src/main/java/org/elasticsearch/xcontent/provider/json/ESUTF8StreamJsonParser.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,14 @@
1818

1919
import org.elasticsearch.xcontent.Text;
2020
import org.elasticsearch.xcontent.XContentString;
21+
import org.elasticsearch.xcontent.provider.OptimizedTextCapable;
2122

2223
import java.io.IOException;
2324
import java.io.InputStream;
2425
import java.util.ArrayList;
2526
import java.util.List;
2627

27-
public class ESUTF8StreamJsonParser extends UTF8StreamJsonParser {
28+
public class ESUTF8StreamJsonParser extends UTF8StreamJsonParser implements OptimizedTextCapable {
2829
protected int stringEnd = -1;
2930
protected int stringLength;
3031

@@ -49,6 +50,7 @@ public ESUTF8StreamJsonParser(
4950
* Method that will try to get underlying UTF-8 encoded bytes of the current string token.
5051
* This is only a best-effort attempt; if there is some reason the bytes cannot be retrieved, this method will return null.
5152
*/
53+
@Override
5254
public Text getValueAsText() throws IOException {
5355
if (_currToken == JsonToken.VALUE_STRING && _tokenIncomplete) {
5456
if (stringEnd > 0) {

libs/x-content/impl/src/main/java/org/elasticsearch/xcontent/provider/json/JsonXContentParser.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
import org.elasticsearch.xcontent.XContentParserConfiguration;
2727
import org.elasticsearch.xcontent.XContentString;
2828
import org.elasticsearch.xcontent.XContentType;
29+
import org.elasticsearch.xcontent.provider.OptimizedTextCapable;
2930
import org.elasticsearch.xcontent.provider.XContentParserConfigurationImpl;
3031
import org.elasticsearch.xcontent.support.AbstractXContentParser;
3132

@@ -146,8 +147,8 @@ public XContentString optimizedText() throws IOException {
146147
if (currentToken().isValue() == false) {
147148
throwOnNoText();
148149
}
149-
if (parser instanceof ESUTF8StreamJsonParser esParser) {
150-
var bytesRef = esParser.getValueAsText();
150+
if (parser instanceof OptimizedTextCapable optimizedTextCapableParser) {
151+
var bytesRef = optimizedTextCapableParser.getValueAsText();
151152
if (bytesRef != null) {
152153
return bytesRef;
153154
}

0 commit comments

Comments
 (0)