Skip to content

Commit dcbf2cb

Browse files
committed
Use immutable XContentText.EncodedBytes record for encoded Text
We actually can't use the native java ByteBuffer class here because it's a mutable class (since it keeps track of how many bytes have been consumed), which creates concurrency issues.
1 parent 614ec17 commit dcbf2cb

File tree

6 files changed

+63
-25
lines changed

6 files changed

+63
-25
lines changed

libs/x-content/src/main/java/org/elasticsearch/xcontent/Text.java

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@
1313
import java.nio.charset.StandardCharsets;
1414

1515
/**
16-
* Both {@link String} and {@link ByteBuffer} representation of the text. Starts with one of those, and if
17-
* the other is requests, caches the other one in a local reference so no additional conversion will be needed.
16+
* Both {@link String} and {@link EncodedBytes} representation of the text. Starts with one of those, and if
17+
* the other is requested, caches the other one in a local reference so no additional conversion will be needed.
1818
*/
1919
public final class Text implements XContentString, Comparable<Text>, ToXContentFragment {
2020

@@ -31,7 +31,7 @@ public static Text[] convertFromStringArray(String[] strings) {
3131
return texts;
3232
}
3333

34-
private ByteBuffer bytes;
34+
private EncodedBytes bytes;
3535
private String text;
3636
private int hash;
3737
private int stringLength = -1;
@@ -40,15 +40,15 @@ public static Text[] convertFromStringArray(String[] strings) {
4040
* Construct a Text from a UTF-8 encoded ByteBuffer. Since no string length is specified, {@link #stringLength()}
4141
* will perform a string conversion to measure the string length.
4242
*/
43-
public Text(ByteBuffer bytes) {
43+
public Text(EncodedBytes bytes) {
4444
this.bytes = bytes;
4545
}
4646

4747
/**
4848
* Construct a Text from a UTF-8 encoded ByteBuffer and an explicit string length. Used to avoid string conversion
4949
* in {@link #stringLength()}.
5050
*/
51-
public Text(ByteBuffer bytes, int stringLength) {
51+
public Text(EncodedBytes bytes, int stringLength) {
5252
this.bytes = bytes;
5353
this.stringLength = stringLength;
5454
}
@@ -58,16 +58,18 @@ public Text(String text) {
5858
}
5959

6060
/**
61-
* Whether a {@link ByteBuffer} view of the data is already materialized.
61+
* Whether an {@link EncodedBytes} view of the data is already materialized.
6262
*/
6363
public boolean hasBytes() {
6464
return bytes != null;
6565
}
6666

6767
@Override
68-
public ByteBuffer bytes() {
68+
public EncodedBytes bytes() {
6969
if (bytes == null) {
70-
bytes = StandardCharsets.UTF_8.encode(text);
70+
var byteBuff = StandardCharsets.UTF_8.encode(text);
71+
assert byteBuff.hasArray();
72+
bytes = new EncodedBytes(byteBuff.array(), byteBuff.arrayOffset() + byteBuff.position(), byteBuff.remaining());
7173
}
7274
return bytes;
7375
}
@@ -82,7 +84,8 @@ public boolean hasString() {
8284
@Override
8385
public String string() {
8486
if (text == null) {
85-
text = StandardCharsets.UTF_8.decode(bytes).toString();
87+
var byteBuff = ByteBuffer.wrap(bytes.bytes(), bytes.offset(), bytes.length());
88+
text = StandardCharsets.UTF_8.decode(byteBuff).toString();
8689
}
8790
return text;
8891
}
@@ -131,8 +134,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
131134
} else {
132135
// TODO: TextBytesOptimization we can use a buffer here to convert it? maybe add a
133136
// request to jackson to support InputStream as well?
134-
assert bytes.hasArray();
135-
return builder.utf8Value(bytes.array(), bytes.arrayOffset() + bytes.position(), bytes.remaining());
137+
return builder.utf8Value(bytes.bytes(), bytes.offset(), bytes.length());
136138
}
137139
}
138140
}

libs/x-content/src/main/java/org/elasticsearch/xcontent/XContentString.java

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,34 @@
1212
import java.nio.ByteBuffer;
1313

1414
public interface XContentString {
15+
record EncodedBytes(byte[] bytes, int offset, int length) implements Comparable<EncodedBytes> {
16+
public EncodedBytes(byte[] bytes) {
17+
this(bytes, 0, bytes.length);
18+
}
19+
20+
@Override
21+
public int compareTo(EncodedBytes o) {
22+
return ByteBuffer.wrap(bytes, offset, length).compareTo(ByteBuffer.wrap(o.bytes, o.offset, o.length));
23+
}
24+
25+
@Override
26+
public boolean equals(Object o) {
27+
if (this == o) {
28+
return true;
29+
}
30+
if (o == null || getClass() != o.getClass()) {
31+
return false;
32+
}
33+
34+
return this.compareTo((EncodedBytes) o) == 0;
35+
}
36+
37+
@Override
38+
public int hashCode() {
39+
return ByteBuffer.wrap(bytes, offset, length).hashCode();
40+
}
41+
}
42+
1543
/**
1644
* Returns a {@link String} view of the data.
1745
*/
@@ -20,7 +48,7 @@ public interface XContentString {
2048
/**
2149
* Returns a UTF8-encoded {@link ByteBuffer} view of the data.
2250
*/
23-
ByteBuffer bytes();
51+
EncodedBytes bytes();
2452

2553
/**
2654
* Returns the number of characters in the represented string.

server/src/main/java/org/elasticsearch/common/io/stream/StreamInput.java

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
import org.elasticsearch.core.Nullable;
3030
import org.elasticsearch.core.TimeValue;
3131
import org.elasticsearch.xcontent.Text;
32+
import org.elasticsearch.xcontent.XContentString;
3233

3334
import java.io.EOFException;
3435
import java.io.FilterInputStream;
@@ -395,8 +396,8 @@ public Text readOptionalText() throws IOException {
395396
if (length > 0) {
396397
readBytes(bytes, 0, length);
397398
}
398-
var byteBuff = ByteBuffer.wrap(bytes);
399-
return new Text(byteBuff);
399+
var encoded = new XContentString.EncodedBytes(bytes);
400+
return new Text(encoded);
400401
}
401402

402403
public Text readText() throws IOException {
@@ -406,8 +407,8 @@ public Text readText() throws IOException {
406407
if (length > 0) {
407408
readBytes(bytes, 0, length);
408409
}
409-
var byteBuff = ByteBuffer.wrap(bytes);
410-
return new Text(byteBuff);
410+
var encoded = new XContentString.EncodedBytes(bytes);
411+
return new Text(encoded);
411412
}
412413

413414
@Nullable

server/src/main/java/org/elasticsearch/common/io/stream/StreamOutput.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -419,7 +419,8 @@ public void writeText(Text text) throws IOException {
419419
writeInt(spare.length());
420420
write(spare.bytes(), 0, spare.length());
421421
} else {
422-
BytesReference bytes = BytesReference.fromByteBuffer(text.bytes());
422+
var encoded = text.bytes();
423+
BytesReference bytes = new BytesArray(encoded.bytes(), encoded.offset(), encoded.length());
423424
writeInt(bytes.length());
424425
bytes.writeTo(this);
425426
}

server/src/main/java/org/elasticsearch/search/suggest/term/TermSuggester.java

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,9 @@
2020
import org.elasticsearch.search.suggest.SuggestionSearchContext.SuggestionContext;
2121
import org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator;
2222
import org.elasticsearch.xcontent.Text;
23+
import org.elasticsearch.xcontent.XContentString;
2324

2425
import java.io.IOException;
25-
import java.nio.ByteBuffer;
2626
import java.util.ArrayList;
2727
import java.util.List;
2828

@@ -48,7 +48,8 @@ public TermSuggestion innerExecute(String name, TermSuggestionContext suggestion
4848
suggestion.getDirectSpellCheckerSettings().suggestMode()
4949
);
5050
var termBytes = token.term.bytes();
51-
Text key = new Text(ByteBuffer.wrap(termBytes.bytes, termBytes.offset, termBytes.length));
51+
var termEncoded = new XContentString.EncodedBytes(termBytes.bytes, termBytes.offset, termBytes.length);
52+
Text key = new Text(termEncoded);
5253
TermSuggestion.Entry resultEntry = new TermSuggestion.Entry(key, token.startOffset, token.endOffset - token.startOffset);
5354
for (SuggestWord suggestWord : suggestedWords) {
5455
Text word = new Text(suggestWord.string);
@@ -98,7 +99,8 @@ protected TermSuggestion emptySuggestion(String name, TermSuggestionContext sugg
9899
List<Token> tokens = queryTerms(suggestion, spare);
99100
for (Token token : tokens) {
100101
var termBytes = token.term.bytes();
101-
Text key = new Text(ByteBuffer.wrap(termBytes.bytes, termBytes.offset, termBytes.length));
102+
var termEncoded = new XContentString.EncodedBytes(termBytes.bytes, termBytes.offset, termBytes.length);
103+
Text key = new Text(termEncoded);
102104
TermSuggestion.Entry resultEntry = new TermSuggestion.Entry(key, token.startOffset, token.endOffset - token.startOffset);
103105
termSuggestion.addTerm(resultEntry);
104106
}

server/src/test/java/org/elasticsearch/common/xcontent/BaseXContentTestCase.java

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
import org.elasticsearch.xcontent.XContentParser;
3535
import org.elasticsearch.xcontent.XContentParser.Token;
3636
import org.elasticsearch.xcontent.XContentParserConfiguration;
37+
import org.elasticsearch.xcontent.XContentString;
3738
import org.elasticsearch.xcontent.XContentType;
3839
import org.hamcrest.Matcher;
3940
import org.hamcrest.Matchers;
@@ -377,15 +378,18 @@ public void testText() throws Exception {
377378
assertResult("{'text':''}", () -> builder().startObject().field("text", new Text("")).endObject());
378379
assertResult("{'text':'foo bar'}", () -> builder().startObject().field("text", new Text("foo bar")).endObject());
379380

380-
final var random = ByteBuffer.wrap(randomBytes());
381-
XContentBuilder builder = builder().startObject().field("text", new Text(random)).endObject();
381+
final var random = randomBytes();
382+
XContentBuilder builder = builder().startObject().field("text", new Text(new XContentString.EncodedBytes(random))).endObject();
382383

383384
try (XContentParser parser = createParser(xcontentType().xContent(), BytesReference.bytes(builder))) {
384385
assertSame(parser.nextToken(), Token.START_OBJECT);
385386
assertSame(parser.nextToken(), Token.FIELD_NAME);
386387
assertEquals(parser.currentName(), "text");
387388
assertTrue(parser.nextToken().isValue());
388-
assertThat(new BytesRef(parser.charBuffer()).utf8ToString(), equalTo(StandardCharsets.UTF_8.decode(random).toString()));
389+
assertThat(
390+
new BytesRef(parser.charBuffer()).utf8ToString(),
391+
equalTo(StandardCharsets.UTF_8.decode(ByteBuffer.wrap(random)).toString())
392+
);
389393
assertSame(parser.nextToken(), Token.END_OBJECT);
390394
assertNull(parser.nextToken());
391395
}
@@ -593,7 +597,7 @@ public void testObjects() throws Exception {
593597
objects.put("{'objects':['a','b','c']}", new Object[] { "a", "b", "c" });
594598
objects.put(
595599
"{'objects':['a','b','c']}",
596-
new Object[] { new Text("a"), new Text(StandardCharsets.UTF_8.encode("b")), new Text("c") }
600+
new Object[] { new Text("a"), new Text(new XContentString.EncodedBytes("b".getBytes(StandardCharsets.UTF_8))), new Text("c") }
597601
);
598602
objects.put("{'objects':null}", null);
599603
objects.put("{'objects':[null,null,null]}", new Object[] { null, null, null });
@@ -640,7 +644,7 @@ public void testObject() throws Exception {
640644
object.put("{'object':1}", (short) 1);
641645
object.put("{'object':'string'}", "string");
642646
object.put("{'object':'a'}", new Text("a"));
643-
object.put("{'object':'b'}", new Text(StandardCharsets.UTF_8.encode("b")));
647+
object.put("{'object':'b'}", new Text(new XContentString.EncodedBytes("b".getBytes(StandardCharsets.UTF_8))));
644648
object.put("{'object':null}", null);
645649
object.put("{'object':'OPEN'}", IndexMetadata.State.OPEN);
646650
object.put("{'object':'NM'}", DistanceUnit.NAUTICALMILES);

0 commit comments

Comments
 (0)