Skip to content

Commit a8114cc

Browse files
Fix decoding of non-ascii field names in ignored source (#132018) (#132031)
When encoding an ignored source entry, we write the string length of the field name, not the encoded byte count; however, the decode logic treats this encoded value as the byte length. This patch updates the decode logic to instead properly treat the value as the string length.
1 parent 414aadb commit a8114cc

File tree

3 files changed

+22
-2
lines changed

3 files changed

+22
-2
lines changed

docs/changelog/132018.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 132018
2+
summary: Fix decoding of non-ascii field names in ignored source
3+
area: Mapping
4+
type: bug
5+
issues: []

server/src/main/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapper.java

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -179,8 +179,12 @@ static NameValue decode(Object field) {
179179
int encodedSize = ByteUtils.readIntLE(bytes, 0);
180180
int nameSize = encodedSize % PARENT_OFFSET_IN_NAME_OFFSET;
181181
int parentOffset = encodedSize / PARENT_OFFSET_IN_NAME_OFFSET;
182-
String name = new String(bytes, 4, nameSize, StandardCharsets.UTF_8);
183-
BytesRef value = new BytesRef(bytes, 4 + nameSize, bytes.length - nameSize - 4);
182+
183+
String decoded = new String(bytes, 4, bytes.length - 4, StandardCharsets.UTF_8);
184+
String name = decoded.substring(0, nameSize);
185+
int nameByteCount = name.getBytes(StandardCharsets.UTF_8).length;
186+
187+
BytesRef value = new BytesRef(bytes, 4 + nameByteCount, bytes.length - nameByteCount - 4);
184188
return new NameValue(name, parentOffset, value, null);
185189
}
186190

server/src/test/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapperTests.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,14 @@
1010
package org.elasticsearch.index.mapper;
1111

1212
import org.apache.lucene.index.DirectoryReader;
13+
import org.elasticsearch.common.Strings;
1314
import org.elasticsearch.common.settings.Settings;
1415
import org.elasticsearch.core.CheckedConsumer;
1516
import org.elasticsearch.core.Nullable;
1617
import org.elasticsearch.search.lookup.SourceFilter;
1718
import org.elasticsearch.test.FieldMaskingReader;
1819
import org.elasticsearch.xcontent.XContentBuilder;
20+
import org.elasticsearch.xcontent.json.JsonXContent;
1921
import org.hamcrest.Matchers;
2022

2123
import java.io.IOException;
@@ -122,6 +124,15 @@ public void testIgnoredString() throws IOException {
122124
);
123125
}
124126

127+
public void testIgnoredStringFullUnicode() throws IOException {
128+
String value = randomUnicodeOfCodepointLengthBetween(5, 20);
129+
String fieldName = randomUnicodeOfCodepointLength(5);
130+
131+
String expected = Strings.toString(JsonXContent.contentBuilder().startObject().field(fieldName, value).endObject());
132+
133+
assertEquals(expected, getSyntheticSourceWithFieldLimit(b -> b.field(fieldName, value)));
134+
}
135+
125136
public void testIgnoredInt() throws IOException {
126137
int value = randomInt();
127138
assertEquals("{\"my_value\":" + value + "}", getSyntheticSourceWithFieldLimit(b -> b.field("my_value", value)));

0 commit comments

Comments
 (0)