Skip to content

Commit 207a425

Browse files
authored
[8.19] Reduce the number of fields per document (elastic#132322) (elastic#134911)
1 parent 5deec04 commit 207a425

File tree

5 files changed

+103
-15
lines changed

5 files changed

+103
-15
lines changed

docs/changelog/134790.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 134790
2+
summary: "Bug fix: Facilitate second retrieval of the same value"
3+
area: Infra/Core
4+
type: bug
5+
issues:
6+
- 134770

libs/x-content/impl/src/main/java/org/elasticsearch/xcontent/provider/json/ESUTF8StreamJsonParser.java

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
public class ESUTF8StreamJsonParser extends UTF8StreamJsonParser {
2828
protected int stringEnd = -1;
2929
protected int stringLength;
30+
protected byte[] lastOptimisedValue;
3031

3132
private final List<Integer> backslashes = new ArrayList<>();
3233

@@ -51,6 +52,9 @@ public ESUTF8StreamJsonParser(
5152
*/
5253
public Text getValueAsText() throws IOException {
5354
if (_currToken == JsonToken.VALUE_STRING && _tokenIncomplete) {
55+
if (lastOptimisedValue != null) {
56+
return new Text(new XContentString.UTF8Bytes(lastOptimisedValue), stringLength);
57+
}
5458
if (stringEnd > 0) {
5559
final int len = stringEnd - 1 - _inputPtr;
5660
return new Text(new XContentString.UTF8Bytes(_inputBuffer, _inputPtr, len), stringLength);
@@ -135,37 +139,40 @@ protected Text _finishAndReturnText() throws IOException {
135139
copyPtr = backslash + 1;
136140
}
137141
System.arraycopy(inputBuffer, copyPtr, buff, destPtr, ptr - copyPtr);
142+
lastOptimisedValue = buff;
138143
return new Text(new XContentString.UTF8Bytes(buff), stringLength);
139144
}
140145
}
141146

142147
@Override
143148
public JsonToken nextToken() throws IOException {
144-
if (_currToken == JsonToken.VALUE_STRING && _tokenIncomplete && stringEnd > 0) {
145-
_inputPtr = stringEnd;
146-
_tokenIncomplete = false;
147-
}
149+
maybeResetCurrentTokenState();
148150
stringEnd = -1;
149151
return super.nextToken();
150152
}
151153

152154
@Override
153155
public boolean nextFieldName(SerializableString str) throws IOException {
154-
if (_currToken == JsonToken.VALUE_STRING && _tokenIncomplete && stringEnd > 0) {
155-
_inputPtr = stringEnd;
156-
_tokenIncomplete = false;
157-
}
156+
maybeResetCurrentTokenState();
158157
stringEnd = -1;
159158
return super.nextFieldName(str);
160159
}
161160

162161
@Override
163162
public String nextFieldName() throws IOException {
163+
maybeResetCurrentTokenState();
164+
stringEnd = -1;
165+
return super.nextFieldName();
166+
}
167+
168+
/**
169+
* Resets the current token state before moving to the next.
170+
*/
171+
private void maybeResetCurrentTokenState() {
164172
if (_currToken == JsonToken.VALUE_STRING && _tokenIncomplete && stringEnd > 0) {
165173
_inputPtr = stringEnd;
166174
_tokenIncomplete = false;
175+
lastOptimisedValue = null;
167176
}
168-
stringEnd = -1;
169-
return super.nextFieldName();
170177
}
171178
}

libs/x-content/impl/src/test/java/org/elasticsearch/xcontent/provider/json/ESUTF8StreamJsonParserTests.java

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -57,14 +57,30 @@ public void testGetValueAsText() throws IOException {
5757
assertThat(parser.nextToken(), Matchers.equalTo(JsonToken.END_OBJECT));
5858
});
5959

60-
testParseJson("{\"foo\": \"bar\\\"baz\\\"\"}", parser -> {
60+
testParseJson("{\"foo\": [\"bar\\\"baz\\\"\", \"foobar\"]}", parser -> {
6161
assertThat(parser.nextToken(), Matchers.equalTo(JsonToken.START_OBJECT));
6262
assertThat(parser.nextFieldName(), Matchers.equalTo("foo"));
63+
64+
assertThat(parser.nextValue(), Matchers.equalTo(JsonToken.START_ARRAY));
6365
assertThat(parser.nextValue(), Matchers.equalTo(JsonToken.VALUE_STRING));
6466

65-
var text = parser.getValueAsText();
66-
assertThat(text, Matchers.notNullValue());
67-
assertTextRef(text.bytes(), "bar\"baz\"");
67+
var firstText = parser.getValueAsText();
68+
assertThat(firstText, Matchers.notNullValue());
69+
assertTextRef(firstText.bytes(), "bar\"baz\"");
70+
// Retrieve the value for a second time to ensure the last value is available
71+
firstText = parser.getValueAsText();
72+
assertThat(firstText, Matchers.notNullValue());
73+
assertTextRef(firstText.bytes(), "bar\"baz\"");
74+
75+
// Ensure values lastOptimisedValue is reset
76+
assertThat(parser.nextValue(), Matchers.equalTo(JsonToken.VALUE_STRING));
77+
var secondTest = parser.getValueAsText();
78+
assertThat(secondTest, Matchers.notNullValue());
79+
assertTextRef(secondTest.bytes(), "foobar");
80+
secondTest = parser.getValueAsText();
81+
assertThat(secondTest, Matchers.notNullValue());
82+
assertTextRef(secondTest.bytes(), "foobar");
83+
assertThat(parser.nextValue(), Matchers.equalTo(JsonToken.END_ARRAY));
6884
});
6985

7086
testParseJson("{\"foo\": \"b\\u00e5r\"}", parser -> {
@@ -256,9 +272,17 @@ public void testGetValueRandomized() throws IOException {
256272
var text = parser.getValueAsText();
257273
assertTextRef(text.bytes(), currVal);
258274
assertThat(text.stringLength(), Matchers.equalTo(currVal.length()));
275+
276+
// Retrieve it twice to ensure it works as expected
277+
text = parser.getValueAsText();
278+
assertTextRef(text.bytes(), currVal);
279+
assertThat(text.stringLength(), Matchers.equalTo(currVal.length()));
259280
} else {
260281
assertThat(parser.getValueAsText(), Matchers.nullValue());
261282
assertThat(parser.getValueAsString(), Matchers.equalTo(currVal));
283+
// Retrieve it twice to ensure it works as expected
284+
assertThat(parser.getValueAsText(), Matchers.nullValue());
285+
assertThat(parser.getValueAsString(), Matchers.equalTo(currVal));
262286
}
263287
}
264288
});
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
---
2+
Keyword with escaped characters as multi-field:
3+
- requires:
4+
cluster_features: [ "mapper.multi_field.unicode_optimisation_fix" ]
5+
reason: "requires a fix (#134770)"
6+
- do:
7+
indices.create:
8+
index: test
9+
body:
10+
mappings:
11+
properties:
12+
foo:
13+
type: keyword
14+
fields:
15+
bar:
16+
type: keyword
17+
18+
- do:
19+
index:
20+
index: test
21+
id: "1"
22+
refresh: true
23+
body:
24+
foo: "c:\\windows\\system32\\svchost.exe"
25+
26+
- do:
27+
search:
28+
index: test
29+
body:
30+
query:
31+
term:
32+
foo: "c:\\windows\\system32\\svchost.exe"
33+
34+
- match: { "hits.total.value": 1 }
35+
- match:
36+
hits.hits.0._source.foo: "c:\\windows\\system32\\svchost.exe"
37+
38+
# Test that optimisation works the same for the multi-fields as well.
39+
- do:
40+
search:
41+
index: test
42+
body:
43+
query:
44+
term:
45+
foo.bar: "c:\\windows\\system32\\svchost.exe"
46+
47+
- match: { "hits.total.value": 1 }
48+
- match:
49+
hits.hits.0._source.foo: "c:\\windows\\system32\\svchost.exe"

server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ public Set<NodeFeature> getFeatures() {
7373
"mapper.unknown_field_mapping_update_error_message"
7474
);
7575
static final NodeFeature NPE_ON_DIMS_UPDATE_FIX = new NodeFeature("mapper.npe_on_dims_update_fix");
76+
public static final NodeFeature MULTI_FIELD_UNICODE_OPTIMISATION_FIX = new NodeFeature("mapper.multi_field.unicode_optimisation_fix");
7677

7778
@Override
7879
public Set<NodeFeature> getTestFeatures() {
@@ -99,7 +100,8 @@ public Set<NodeFeature> getTestFeatures() {
99100
RESCORE_VECTOR_QUANTIZED_VECTOR_MAPPING,
100101
RESCORE_ZERO_VECTOR_QUANTIZED_VECTOR_MAPPING,
101102
USE_DEFAULT_OVERSAMPLE_VALUE_FOR_BBQ,
102-
SPARSE_VECTOR_INDEX_OPTIONS_FEATURE
103+
SPARSE_VECTOR_INDEX_OPTIONS_FEATURE,
104+
MULTI_FIELD_UNICODE_OPTIMISATION_FIX
103105
);
104106
}
105107
}

0 commit comments

Comments
 (0)