Skip to content

Commit 4af0414

Browse files
Fix decoding of non-ascii field names in ignored source (#132018) (#132034)
When encoding an ignored source entry, we write the string length of the field name, not the encoded byte count; however, the decode logic treats this encoded value as the byte length. This patch updates the decode logic to instead properly treat the value as the string length. (cherry picked from commit 178c0c9)
1 parent 286865a commit 4af0414

File tree

3 files changed

+22
-2
lines changed

3 files changed

+22
-2
lines changed

docs/changelog/132018.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 132018
2+
summary: Fix decoding of non-ascii field names in ignored source
3+
area: Mapping
4+
type: bug
5+
issues: []

server/src/main/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapper.java

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -181,8 +181,12 @@ static NameValue decode(Object field) {
181181
int encodedSize = ByteUtils.readIntLE(bytes, 0);
182182
int nameSize = encodedSize % PARENT_OFFSET_IN_NAME_OFFSET;
183183
int parentOffset = encodedSize / PARENT_OFFSET_IN_NAME_OFFSET;
184-
String name = new String(bytes, 4, nameSize, StandardCharsets.UTF_8);
185-
BytesRef value = new BytesRef(bytes, 4 + nameSize, bytes.length - nameSize - 4);
184+
185+
String decoded = new String(bytes, 4, bytes.length - 4, StandardCharsets.UTF_8);
186+
String name = decoded.substring(0, nameSize);
187+
int nameByteCount = name.getBytes(StandardCharsets.UTF_8).length;
188+
189+
BytesRef value = new BytesRef(bytes, 4 + nameByteCount, bytes.length - nameByteCount - 4);
186190
return new NameValue(name, parentOffset, value, null);
187191
}
188192

server/src/test/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapperTests.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,14 @@
1010
package org.elasticsearch.index.mapper;
1111

1212
import org.apache.lucene.index.DirectoryReader;
13+
import org.elasticsearch.common.Strings;
1314
import org.elasticsearch.common.settings.Settings;
1415
import org.elasticsearch.core.CheckedConsumer;
1516
import org.elasticsearch.core.Nullable;
1617
import org.elasticsearch.search.lookup.SourceFilter;
1718
import org.elasticsearch.test.FieldMaskingReader;
1819
import org.elasticsearch.xcontent.XContentBuilder;
20+
import org.elasticsearch.xcontent.json.JsonXContent;
1921
import org.hamcrest.Matchers;
2022

2123
import java.io.IOException;
@@ -122,6 +124,15 @@ public void testIgnoredString() throws IOException {
122124
);
123125
}
124126

127+
public void testIgnoredStringFullUnicode() throws IOException {
128+
String value = randomUnicodeOfCodepointLengthBetween(5, 20);
129+
String fieldName = randomUnicodeOfCodepointLength(5);
130+
131+
String expected = Strings.toString(JsonXContent.contentBuilder().startObject().field(fieldName, value).endObject());
132+
133+
assertEquals(expected, getSyntheticSourceWithFieldLimit(b -> b.field(fieldName, value)));
134+
}
135+
125136
public void testIgnoredInt() throws IOException {
126137
int value = randomInt();
127138
assertEquals("{\"my_value\":" + value + "}", getSyntheticSourceWithFieldLimit(b -> b.field("my_value", value)));

0 commit comments

Comments
 (0)