From a2ffbe15142d841c4b3d840f94b55bfd3a3324eb Mon Sep 17 00:00:00 2001 From: Jordan Powers Date: Mon, 28 Jul 2025 09:42:10 -0700 Subject: [PATCH] Fix decoding of non-ascii field names in ignored source (#132018) When encoding an ignored source entry, we write the string length of the field name, not the encoded byte count; however, the decode logic treats this encoded value as the byte length. This patch updates the decode logic to instead properly treat the value as the string length. --- docs/changelog/132018.yaml | 5 +++++ .../index/mapper/IgnoredSourceFieldMapper.java | 8 ++++++-- .../index/mapper/IgnoredSourceFieldMapperTests.java | 11 +++++++++++ 3 files changed, 22 insertions(+), 2 deletions(-) create mode 100644 docs/changelog/132018.yaml diff --git a/docs/changelog/132018.yaml b/docs/changelog/132018.yaml new file mode 100644 index 0000000000000..9032707df8542 --- /dev/null +++ b/docs/changelog/132018.yaml @@ -0,0 +1,5 @@ +pr: 132018 +summary: Fix decoding of non-ascii field names in ignored source +area: Mapping +type: bug +issues: [] diff --git a/server/src/main/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapper.java index 2785a75b4b4c3..19c0adc9e06ab 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapper.java @@ -179,8 +179,12 @@ static NameValue decode(Object field) { int encodedSize = ByteUtils.readIntLE(bytes, 0); int nameSize = encodedSize % PARENT_OFFSET_IN_NAME_OFFSET; int parentOffset = encodedSize / PARENT_OFFSET_IN_NAME_OFFSET; - String name = new String(bytes, 4, nameSize, StandardCharsets.UTF_8); - BytesRef value = new BytesRef(bytes, 4 + nameSize, bytes.length - nameSize - 4); + + String decoded = new String(bytes, 4, bytes.length - 4, StandardCharsets.UTF_8); + String name = decoded.substring(0, nameSize); + int nameByteCount = name.getBytes(StandardCharsets.UTF_8).length; + + BytesRef value = new BytesRef(bytes, 4 + nameByteCount, bytes.length - nameByteCount - 4); return new NameValue(name, parentOffset, value, null); } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapperTests.java index 628b64de19bd1..a98ae5219fc44 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapperTests.java @@ -10,12 +10,14 @@ package org.elasticsearch.index.mapper; import org.apache.lucene.index.DirectoryReader; +import org.elasticsearch.common.Strings; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.core.CheckedConsumer; import org.elasticsearch.core.Nullable; import org.elasticsearch.search.lookup.SourceFilter; import org.elasticsearch.test.FieldMaskingReader; import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xcontent.json.JsonXContent; import org.hamcrest.Matchers; import java.io.IOException; @@ -122,6 +124,15 @@ public void testIgnoredString() throws IOException { ); } + public void testIgnoredStringFullUnicode() throws IOException { + String value = randomUnicodeOfCodepointLengthBetween(5, 20); + String fieldName = randomUnicodeOfCodepointLength(5); + + String expected = Strings.toString(JsonXContent.contentBuilder().startObject().field(fieldName, value).endObject()); + + assertEquals(expected, getSyntheticSourceWithFieldLimit(b -> b.field(fieldName, value))); + } + public void testIgnoredInt() throws IOException { int value = randomInt(); assertEquals("{\"my_value\":" + value + "}", getSyntheticSourceWithFieldLimit(b -> b.field("my_value", value)));