Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/changelog/134790.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 134790
summary: "Bug fix: Facilitate second retrieval of the same value"
area: Infra/Core
type: bug
issues:
- 134770
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
public class ESUTF8StreamJsonParser extends UTF8StreamJsonParser implements OptimizedTextCapable {
protected int stringEnd = -1;
protected int stringLength;
protected byte[] lastOptimisedValue;

private final List<Integer> backslashes = new ArrayList<>();

Expand All @@ -53,6 +54,9 @@ public ESUTF8StreamJsonParser(
@Override
public Text getValueAsText() throws IOException {
if (_currToken == JsonToken.VALUE_STRING && _tokenIncomplete) {
if (lastOptimisedValue != null) {
return new Text(new XContentString.UTF8Bytes(lastOptimisedValue), stringLength);
}
if (stringEnd > 0) {
final int len = stringEnd - 1 - _inputPtr;
return new Text(new XContentString.UTF8Bytes(_inputBuffer, _inputPtr, len), stringLength);
Expand Down Expand Up @@ -137,6 +141,7 @@ protected Text _finishAndReturnText() throws IOException {
copyPtr = backslash + 1;
}
System.arraycopy(inputBuffer, copyPtr, buff, destPtr, ptr - copyPtr);
lastOptimisedValue = buff;
return new Text(new XContentString.UTF8Bytes(buff), stringLength);
}
}
Expand All @@ -146,6 +151,7 @@ public JsonToken nextToken() throws IOException {
if (_currToken == JsonToken.VALUE_STRING && _tokenIncomplete && stringEnd > 0) {
_inputPtr = stringEnd;
_tokenIncomplete = false;
lastOptimisedValue = null;
}
stringEnd = -1;
return super.nextToken();
Expand All @@ -156,6 +162,7 @@ public boolean nextFieldName(SerializableString str) throws IOException {
if (_currToken == JsonToken.VALUE_STRING && _tokenIncomplete && stringEnd > 0) {
_inputPtr = stringEnd;
_tokenIncomplete = false;
lastOptimisedValue = null;
}
stringEnd = -1;
return super.nextFieldName(str);
Expand All @@ -166,6 +173,7 @@ public String nextFieldName() throws IOException {
if (_currToken == JsonToken.VALUE_STRING && _tokenIncomplete && stringEnd > 0) {
_inputPtr = stringEnd;
_tokenIncomplete = false;
lastOptimisedValue = null;
}
stringEnd = -1;
return super.nextFieldName();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,10 @@ public void testGetValueAsText() throws IOException {
var text = parser.getValueAsText();
assertThat(text, Matchers.notNullValue());
assertTextRef(text.bytes(), "bar\"baz\"");
// Retrieve the value for a second time to ensure the last value is available
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would make it sense to test the reset of the lastOptimizedValue too? e.g. calling nextFieldName/nextToken + getValueAsText and ensure the content is not stale?

text = parser.getValueAsText();
assertThat(text, Matchers.notNullValue());
assertTextRef(text.bytes(), "bar\"baz\"");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You might consider updating testGetValueRandomized() to also double-retrieve the value from the parser.

});

testParseJson("{\"foo\": \"b\\u00e5r\"}", parser -> {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
---
Keyword with escaped characters as multi-field:
- do:
indices.create:
index: test
body:
mappings:
properties:
foo:
type: keyword
fields:
bar:
type: keyword

- do:
index:
index: test
id: "1"
refresh: true
body:
foo: "c:\\windows\\system32\\svchost.exe"

- do:
search:
index: test
body:
query:
term:
foo: "c:\\windows\\system32\\svchost.exe"

- match: { "hits.total.value": 1 }
- match:
hits.hits.0._source.foo: "c:\\windows\\system32\\svchost.exe"

- do:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If possible, I'd add a comment here pointing to the bug issue, so it's immediately clear why we are calling this twice (a future dev might be tempted to "clean up" things and remove the second call)

search:
index: test
body:
query:
term:
foo.bar: "c:\\windows\\system32\\svchost.exe"

- match: { "hits.total.value": 1 }
- match:
hits.hits.0._source.foo: "c:\\windows\\system32\\svchost.exe"