Skip to content

Commit 8e0ac6d

Browse files
authored
Enable optimizedText for CBOR (#132542)
1 parent 13c2353 commit 8e0ac6d

File tree

11 files changed

+402
-6
lines changed

11 files changed

+402
-6
lines changed

libs/x-content/impl/build.gradle

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,3 +55,7 @@ tasks.named("thirdPartyAudit").configure {
5555
'com.fasterxml.jackson.databind.cfg.MapperBuilder'
5656
)
5757
}
58+
59+
tasks.named("licenseHeaders").configure {
60+
approvedLicenses = ['Apache', 'AGLP+SSPL+Elastic License', 'Generated', 'Vendored']
61+
}
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
package org.elasticsearch.xcontent.provider;
11+
12+
import org.elasticsearch.xcontent.Text;
13+
14+
import java.io.IOException;
15+
16+
/**
17+
* Indicates that a {@link com.fasterxml.jackson.core.JsonParser} is capable of
18+
* returning the underlying UTF-8 encoded bytes of the current string token.
19+
* This is useful for performance optimizations, as it allows the parser to
20+
* avoid unnecessary conversions to and from strings.
21+
*/
22+
public interface OptimizedTextCapable {
23+
24+
/**
25+
* Method that will try to get underlying UTF-8 encoded bytes of the current string token.
26+
* This is only a best-effort attempt; if there is some reason the bytes cannot be retrieved, this method will return null.
27+
*/
28+
Text getValueAsText() throws IOException;
29+
}

libs/x-content/impl/src/main/java/org/elasticsearch/xcontent/provider/cbor/CborXContentImpl.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ public static XContent cborXContent() {
4747
}
4848

4949
static {
50-
cborFactory = XContentImplUtils.configure(CBORFactory.builder());
50+
cborFactory = XContentImplUtils.configure(ESCborFactory.builder());
5151
cborFactory.configure(CBORFactory.Feature.FAIL_ON_SYMBOL_HASH_OVERFLOW, false); // this trips on many mappings now...
5252
// Do not automatically close unclosed objects/arrays in com.fasterxml.jackson.dataformat.cbor.CBORGenerator#close() method
5353
cborFactory.configure(JsonGenerator.Feature.AUTO_CLOSE_JSON_CONTENT, false);
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
package org.elasticsearch.xcontent.provider.cbor;
11+
12+
import com.fasterxml.jackson.core.io.IOContext;
13+
import com.fasterxml.jackson.core.sym.ByteQuadsCanonicalizer;
14+
import com.fasterxml.jackson.dataformat.cbor.CBORFactory;
15+
import com.fasterxml.jackson.dataformat.cbor.CBORParser;
16+
17+
public class ESCborFactory extends CBORFactory {
18+
19+
public static ESCborFactoryBuilder builder() {
20+
return new ESCborFactoryBuilder();
21+
}
22+
23+
@Override
24+
protected CBORParser _createParser(byte[] data, int offset, int len, IOContext ctxt) {
25+
ByteQuadsCanonicalizer can = _byteSymbolCanonicalizer.makeChildOrPlaceholder(_factoryFeatures);
26+
return new ESCborParser(ctxt, _parserFeatures, _formatParserFeatures, _objectCodec, can, null, data, offset, offset + len, false);
27+
}
28+
}
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
package org.elasticsearch.xcontent.provider.cbor;
11+
12+
import com.fasterxml.jackson.dataformat.cbor.CBORFactory;
13+
import com.fasterxml.jackson.dataformat.cbor.CBORFactoryBuilder;
14+
15+
public class ESCborFactoryBuilder extends CBORFactoryBuilder {
16+
17+
@Override
18+
public CBORFactory build() {
19+
return new ESCborFactory();
20+
}
21+
}
Lines changed: 219 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,219 @@
1+
/*
2+
* Copyright Elasticsearch B.V., and/or licensed to Elasticsearch B.V.
3+
* under one or more license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright
5+
* ownership. Elasticsearch B.V. licenses this file to you under
6+
* the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*
19+
* This file is based on a modification of https://github.com/FasterXML/jackson-dataformats-binary which is licensed under the Apache 2.0 License.
20+
*/
21+
22+
package org.elasticsearch.xcontent.provider.cbor;
23+
24+
import com.fasterxml.jackson.core.JsonToken;
25+
import com.fasterxml.jackson.core.ObjectCodec;
26+
import com.fasterxml.jackson.core.io.IOContext;
27+
import com.fasterxml.jackson.core.sym.ByteQuadsCanonicalizer;
28+
import com.fasterxml.jackson.dataformat.cbor.CBORConstants;
29+
import com.fasterxml.jackson.dataformat.cbor.CBORParser;
30+
31+
import org.elasticsearch.xcontent.Text;
32+
import org.elasticsearch.xcontent.XContentString;
33+
import org.elasticsearch.xcontent.provider.OptimizedTextCapable;
34+
35+
import java.io.IOException;
36+
import java.io.InputStream;
37+
import java.util.Locale;
38+
39+
/**
40+
* Contains code adapted from {@link CBORParser} licensed under the Apache License 2.0.
41+
*/
42+
public class ESCborParser extends CBORParser implements OptimizedTextCapable {
43+
public ESCborParser(
44+
IOContext ctxt,
45+
int parserFeatures,
46+
int cborFeatures,
47+
ObjectCodec codec,
48+
ByteQuadsCanonicalizer sym,
49+
InputStream in,
50+
byte[] inputBuffer,
51+
int start,
52+
int end,
53+
boolean bufferRecyclable
54+
) {
55+
super(ctxt, parserFeatures, cborFeatures, codec, sym, in, inputBuffer, start, end, bufferRecyclable);
56+
}
57+
58+
@Override
59+
public Text getValueAsText() throws IOException {
60+
JsonToken t = _currToken;
61+
if (_tokenIncomplete) {
62+
if (t == JsonToken.VALUE_STRING) {
63+
return _finishAndReturnText(_typeByte);
64+
}
65+
}
66+
return null;
67+
}
68+
69+
private Text _finishAndReturnText(int ch) throws IOException {
70+
final int type = ((ch >> 5) & 0x7);
71+
ch &= 0x1F;
72+
73+
// sanity check
74+
if (type != CBORConstants.MAJOR_TYPE_TEXT) {
75+
// should never happen so
76+
_throwInternal();
77+
}
78+
int previousPointer = _inputPtr;
79+
80+
// String value, decode
81+
final int len = _decodeExplicitLength(ch);
82+
if (len == 0) {
83+
return new Text(new XContentString.UTF8Bytes(new byte[0], 0, 0), 0);
84+
}
85+
if (len < 0) {
86+
// optimized text is not supported for chunked strings
87+
return null;
88+
}
89+
final int available = _inputEnd - _inputPtr;
90+
if (available >= len) {
91+
Text text = new Text(new XContentString.UTF8Bytes(_inputBuffer, _inputPtr, len));
92+
_inputPtr = previousPointer;
93+
return text;
94+
}
95+
// this is expected to be used in the context where the input stream is not available
96+
return null;
97+
}
98+
99+
/**
100+
* Method used to decode explicit length of a variable-length value
101+
* (or, for indefinite/chunked, indicate that one is not known).
102+
* Note that long (64-bit) length is only allowed if it fits in
103+
* 32-bit signed int, for now; expectation being that longer values
104+
* are always encoded as chunks.
105+
*/
106+
private int _decodeExplicitLength(int lowBits) throws IOException {
107+
// common case, indefinite length; relies on marker
108+
if (lowBits == 31) {
109+
return -1;
110+
}
111+
if (lowBits <= 23) {
112+
return lowBits;
113+
}
114+
switch (lowBits - 24) {
115+
case 0:
116+
return _decode8Bits();
117+
case 1:
118+
return _decode16Bits();
119+
case 2:
120+
return _decode32Bits();
121+
case 3:
122+
long l = _decode64Bits();
123+
if (l < 0 || l > MAX_INT_L) {
124+
throw _constructError("Illegal length for " + currentToken() + ": " + l);
125+
}
126+
return (int) l;
127+
}
128+
throw _constructError(
129+
String.format(
130+
Locale.ROOT,
131+
"Invalid 5-bit length indicator for `JsonToken.%s`: 0x%02X; only 0x00-0x17, 0x1F allowed",
132+
currentToken(),
133+
lowBits
134+
)
135+
);
136+
}
137+
138+
private int _decode8Bits() throws IOException {
139+
if (_inputPtr >= _inputEnd) {
140+
loadMoreGuaranteed();
141+
}
142+
return _inputBuffer[_inputPtr++] & 0xFF;
143+
}
144+
145+
private int _decode16Bits() throws IOException {
146+
int ptr = _inputPtr;
147+
if ((ptr + 1) >= _inputEnd) {
148+
return _slow16();
149+
}
150+
final byte[] b = _inputBuffer;
151+
int v = ((b[ptr] & 0xFF) << 8) + (b[ptr + 1] & 0xFF);
152+
_inputPtr = ptr + 2;
153+
return v;
154+
}
155+
156+
private int _slow16() throws IOException {
157+
if (_inputPtr >= _inputEnd) {
158+
loadMoreGuaranteed();
159+
}
160+
int v = (_inputBuffer[_inputPtr++] & 0xFF);
161+
if (_inputPtr >= _inputEnd) {
162+
loadMoreGuaranteed();
163+
}
164+
return (v << 8) + (_inputBuffer[_inputPtr++] & 0xFF);
165+
}
166+
167+
private int _decode32Bits() throws IOException {
168+
int ptr = _inputPtr;
169+
if ((ptr + 3) >= _inputEnd) {
170+
return _slow32();
171+
}
172+
final byte[] b = _inputBuffer;
173+
int v = (b[ptr++] << 24) + ((b[ptr++] & 0xFF) << 16) + ((b[ptr++] & 0xFF) << 8) + (b[ptr++] & 0xFF);
174+
_inputPtr = ptr;
175+
return v;
176+
}
177+
178+
private int _slow32() throws IOException {
179+
if (_inputPtr >= _inputEnd) {
180+
loadMoreGuaranteed();
181+
}
182+
int v = _inputBuffer[_inputPtr++]; // sign will disappear anyway
183+
if (_inputPtr >= _inputEnd) {
184+
loadMoreGuaranteed();
185+
}
186+
v = (v << 8) + (_inputBuffer[_inputPtr++] & 0xFF);
187+
if (_inputPtr >= _inputEnd) {
188+
loadMoreGuaranteed();
189+
}
190+
v = (v << 8) + (_inputBuffer[_inputPtr++] & 0xFF);
191+
if (_inputPtr >= _inputEnd) {
192+
loadMoreGuaranteed();
193+
}
194+
return (v << 8) + (_inputBuffer[_inputPtr++] & 0xFF);
195+
}
196+
197+
private long _decode64Bits() throws IOException {
198+
int ptr = _inputPtr;
199+
if ((ptr + 7) >= _inputEnd) {
200+
return _slow64();
201+
}
202+
final byte[] b = _inputBuffer;
203+
int i1 = (b[ptr++] << 24) + ((b[ptr++] & 0xFF) << 16) + ((b[ptr++] & 0xFF) << 8) + (b[ptr++] & 0xFF);
204+
int i2 = (b[ptr++] << 24) + ((b[ptr++] & 0xFF) << 16) + ((b[ptr++] & 0xFF) << 8) + (b[ptr++] & 0xFF);
205+
_inputPtr = ptr;
206+
return _long(i1, i2);
207+
}
208+
209+
private long _slow64() throws IOException {
210+
return _long(_decode32Bits(), _decode32Bits());
211+
}
212+
213+
private static long _long(int i1, int i2) {
214+
long l1 = i1;
215+
long l2 = i2;
216+
l2 = (l2 << 32) >>> 32;
217+
return (l1 << 32) + l2;
218+
}
219+
}

libs/x-content/impl/src/main/java/org/elasticsearch/xcontent/provider/json/ESUTF8StreamJsonParser.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,14 @@
1818

1919
import org.elasticsearch.xcontent.Text;
2020
import org.elasticsearch.xcontent.XContentString;
21+
import org.elasticsearch.xcontent.provider.OptimizedTextCapable;
2122

2223
import java.io.IOException;
2324
import java.io.InputStream;
2425
import java.util.ArrayList;
2526
import java.util.List;
2627

27-
public class ESUTF8StreamJsonParser extends UTF8StreamJsonParser {
28+
public class ESUTF8StreamJsonParser extends UTF8StreamJsonParser implements OptimizedTextCapable {
2829
protected int stringEnd = -1;
2930
protected int stringLength;
3031

@@ -49,6 +50,7 @@ public ESUTF8StreamJsonParser(
4950
* Method that will try to get underlying UTF-8 encoded bytes of the current string token.
5051
* This is only a best-effort attempt; if there is some reason the bytes cannot be retrieved, this method will return null.
5152
*/
53+
@Override
5254
public Text getValueAsText() throws IOException {
5355
if (_currToken == JsonToken.VALUE_STRING && _tokenIncomplete) {
5456
if (stringEnd > 0) {

libs/x-content/impl/src/main/java/org/elasticsearch/xcontent/provider/json/JsonXContentParser.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
import org.elasticsearch.xcontent.XContentParserConfiguration;
2828
import org.elasticsearch.xcontent.XContentString;
2929
import org.elasticsearch.xcontent.XContentType;
30+
import org.elasticsearch.xcontent.provider.OptimizedTextCapable;
3031
import org.elasticsearch.xcontent.provider.XContentParserConfigurationImpl;
3132
import org.elasticsearch.xcontent.support.AbstractXContentParser;
3233

@@ -151,8 +152,8 @@ public XContentString optimizedText() throws IOException {
151152
if (parser instanceof FilteringParserDelegate delegate) {
152153
parser = delegate.delegate();
153154
}
154-
if (parser instanceof ESUTF8StreamJsonParser esParser) {
155-
var bytesRef = esParser.getValueAsText();
155+
if (parser instanceof OptimizedTextCapable optimizedTextCapableParser) {
156+
var bytesRef = optimizedTextCapableParser.getValueAsText();
156157
if (bytesRef != null) {
157158
return bytesRef;
158159
}

0 commit comments

Comments
 (0)