Skip to content

Commit 942e341

Browse files
authored
Bug fix: Facilitate second retrieval of the same value (#134770) (#134790)
In this PR we attempt to fix #134770, we have identified the issue at the retrieval of the field value for the second time in order to populate the keyword multi-field. We noticed that when the we performed the second retrieval it was not using the recently optimised value, but the value of the initial buffer. We added a new variable that "remembers" the optimised value and allows us to reuse it.
1 parent a8b4020 commit 942e341

File tree

5 files changed

+103
-15
lines changed

5 files changed

+103
-15
lines changed

docs/changelog/134790.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 134790
2+
summary: "Bug fix: Facilitate second retrieval of the same value"
3+
area: Infra/Core
4+
type: bug
5+
issues:
6+
- 134770

libs/x-content/impl/src/main/java/org/elasticsearch/xcontent/provider/json/ESUTF8StreamJsonParser.java

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
public class ESUTF8StreamJsonParser extends UTF8StreamJsonParser implements OptimizedTextCapable {
2929
protected int stringEnd = -1;
3030
protected int stringLength;
31+
protected byte[] lastOptimisedValue;
3132

3233
private final List<Integer> backslashes = new ArrayList<>();
3334

@@ -53,6 +54,9 @@ public ESUTF8StreamJsonParser(
5354
@Override
5455
public Text getValueAsText() throws IOException {
5556
if (_currToken == JsonToken.VALUE_STRING && _tokenIncomplete) {
57+
if (lastOptimisedValue != null) {
58+
return new Text(new XContentString.UTF8Bytes(lastOptimisedValue), stringLength);
59+
}
5660
if (stringEnd > 0) {
5761
final int len = stringEnd - 1 - _inputPtr;
5862
return new Text(new XContentString.UTF8Bytes(_inputBuffer, _inputPtr, len), stringLength);
@@ -137,37 +141,40 @@ protected Text _finishAndReturnText() throws IOException {
137141
copyPtr = backslash + 1;
138142
}
139143
System.arraycopy(inputBuffer, copyPtr, buff, destPtr, ptr - copyPtr);
144+
lastOptimisedValue = buff;
140145
return new Text(new XContentString.UTF8Bytes(buff), stringLength);
141146
}
142147
}
143148

144149
@Override
145150
public JsonToken nextToken() throws IOException {
146-
if (_currToken == JsonToken.VALUE_STRING && _tokenIncomplete && stringEnd > 0) {
147-
_inputPtr = stringEnd;
148-
_tokenIncomplete = false;
149-
}
151+
maybeResetCurrentTokenState();
150152
stringEnd = -1;
151153
return super.nextToken();
152154
}
153155

154156
@Override
155157
public boolean nextFieldName(SerializableString str) throws IOException {
156-
if (_currToken == JsonToken.VALUE_STRING && _tokenIncomplete && stringEnd > 0) {
157-
_inputPtr = stringEnd;
158-
_tokenIncomplete = false;
159-
}
158+
maybeResetCurrentTokenState();
160159
stringEnd = -1;
161160
return super.nextFieldName(str);
162161
}
163162

164163
@Override
165164
public String nextFieldName() throws IOException {
165+
maybeResetCurrentTokenState();
166+
stringEnd = -1;
167+
return super.nextFieldName();
168+
}
169+
170+
/**
171+
* Resets the current token state before moving to the next.
172+
*/
173+
private void maybeResetCurrentTokenState() {
166174
if (_currToken == JsonToken.VALUE_STRING && _tokenIncomplete && stringEnd > 0) {
167175
_inputPtr = stringEnd;
168176
_tokenIncomplete = false;
177+
lastOptimisedValue = null;
169178
}
170-
stringEnd = -1;
171-
return super.nextFieldName();
172179
}
173180
}

libs/x-content/impl/src/test/java/org/elasticsearch/xcontent/provider/json/ESUTF8StreamJsonParserTests.java

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -57,14 +57,30 @@ public void testGetValueAsText() throws IOException {
5757
assertThat(parser.nextToken(), Matchers.equalTo(JsonToken.END_OBJECT));
5858
});
5959

60-
testParseJson("{\"foo\": \"bar\\\"baz\\\"\"}", parser -> {
60+
testParseJson("{\"foo\": [\"bar\\\"baz\\\"\", \"foobar\"]}", parser -> {
6161
assertThat(parser.nextToken(), Matchers.equalTo(JsonToken.START_OBJECT));
6262
assertThat(parser.nextFieldName(), Matchers.equalTo("foo"));
63+
64+
assertThat(parser.nextValue(), Matchers.equalTo(JsonToken.START_ARRAY));
6365
assertThat(parser.nextValue(), Matchers.equalTo(JsonToken.VALUE_STRING));
6466

65-
var text = parser.getValueAsText();
66-
assertThat(text, Matchers.notNullValue());
67-
assertTextRef(text.bytes(), "bar\"baz\"");
67+
var firstText = parser.getValueAsText();
68+
assertThat(firstText, Matchers.notNullValue());
69+
assertTextRef(firstText.bytes(), "bar\"baz\"");
70+
// Retrieve the value for a second time to ensure the last value is available
71+
firstText = parser.getValueAsText();
72+
assertThat(firstText, Matchers.notNullValue());
73+
assertTextRef(firstText.bytes(), "bar\"baz\"");
74+
75+
// Ensure values lastOptimisedValue is reset
76+
assertThat(parser.nextValue(), Matchers.equalTo(JsonToken.VALUE_STRING));
77+
var secondTest = parser.getValueAsText();
78+
assertThat(secondTest, Matchers.notNullValue());
79+
assertTextRef(secondTest.bytes(), "foobar");
80+
secondTest = parser.getValueAsText();
81+
assertThat(secondTest, Matchers.notNullValue());
82+
assertTextRef(secondTest.bytes(), "foobar");
83+
assertThat(parser.nextValue(), Matchers.equalTo(JsonToken.END_ARRAY));
6884
});
6985

7086
testParseJson("{\"foo\": \"b\\u00e5r\"}", parser -> {
@@ -256,9 +272,17 @@ public void testGetValueRandomized() throws IOException {
256272
var text = parser.getValueAsText();
257273
assertTextRef(text.bytes(), currVal);
258274
assertThat(text.stringLength(), Matchers.equalTo(currVal.length()));
275+
276+
// Retrieve it twice to ensure it works as expected
277+
text = parser.getValueAsText();
278+
assertTextRef(text.bytes(), currVal);
279+
assertThat(text.stringLength(), Matchers.equalTo(currVal.length()));
259280
} else {
260281
assertThat(parser.getValueAsText(), Matchers.nullValue());
261282
assertThat(parser.getValueAsString(), Matchers.equalTo(currVal));
283+
// Retrieve it twice to ensure it works as expected
284+
assertThat(parser.getValueAsText(), Matchers.nullValue());
285+
assertThat(parser.getValueAsString(), Matchers.equalTo(currVal));
262286
}
263287
}
264288
});
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
---
2+
Keyword with escaped characters as multi-field:
3+
- requires:
4+
cluster_features: [ "mapper.multi_field.unicode_optimisation_fix" ]
5+
reason: "requires a fix (#134770)"
6+
- do:
7+
indices.create:
8+
index: test
9+
body:
10+
mappings:
11+
properties:
12+
foo:
13+
type: keyword
14+
fields:
15+
bar:
16+
type: keyword
17+
18+
- do:
19+
index:
20+
index: test
21+
id: "1"
22+
refresh: true
23+
body:
24+
foo: "c:\\windows\\system32\\svchost.exe"
25+
26+
- do:
27+
search:
28+
index: test
29+
body:
30+
query:
31+
term:
32+
foo: "c:\\windows\\system32\\svchost.exe"
33+
34+
- match: { "hits.total.value": 1 }
35+
- match:
36+
hits.hits.0._source.foo: "c:\\windows\\system32\\svchost.exe"
37+
38+
# Test that optimisation works the same for the multi-fields as well.
39+
- do:
40+
search:
41+
index: test
42+
body:
43+
query:
44+
term:
45+
foo.bar: "c:\\windows\\system32\\svchost.exe"
46+
47+
- match: { "hits.total.value": 1 }
48+
- match:
49+
hits.hits.0._source.foo: "c:\\windows\\system32\\svchost.exe"

server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ public class MapperFeatures implements FeatureSpecification {
4848
static final NodeFeature SEARCH_LOAD_PER_SHARD = new NodeFeature("mapper.search_load_per_shard");
4949
static final NodeFeature PATTERNED_TEXT = new NodeFeature("mapper.patterned_text");
5050
static final NodeFeature IGNORED_SOURCE_FIELDS_PER_ENTRY = new NodeFeature("mapper.ignored_source_fields_per_entry");
51+
public static final NodeFeature MULTI_FIELD_UNICODE_OPTIMISATION_FIX = new NodeFeature("mapper.multi_field.unicode_optimisation_fix");
5152

5253
@Override
5354
public Set<NodeFeature> getTestFeatures() {
@@ -82,7 +83,8 @@ public Set<NodeFeature> getTestFeatures() {
8283
SEARCH_LOAD_PER_SHARD,
8384
SPARSE_VECTOR_INDEX_OPTIONS_FEATURE,
8485
PATTERNED_TEXT,
85-
IGNORED_SOURCE_FIELDS_PER_ENTRY
86+
IGNORED_SOURCE_FIELDS_PER_ENTRY,
87+
MULTI_FIELD_UNICODE_OPTIMISATION_FIX
8688
);
8789
}
8890
}

0 commit comments

Comments
 (0)