Fix decoding of non-ascii field names in ignored source (#132018) (#132034)

jordan-powers · web-flow · commit 4af041434388 · 2025-07-28T21:29:34.000+02:00
When encoding an ignored source entry, we write the string length of the field name, not the encoded byte count; however, the decode logic treats this encoded value as the byte length. This patch updates the decode logic to instead properly treat the value as the string length. (cherry picked from commit 178c0c9)
diff --git a/docs/changelog/132018.yaml b/docs/changelog/132018.yaml
@@ -0,0 +1,5 @@
+pr: 132018
+summary: Fix decoding of non-ascii field names in ignored source
+area: Mapping
+type: bug
+issues: []
diff --git a/server/src/main/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapper.java
@@ -181,8 +181,12 @@ static NameValue decode(Object field) {
         int encodedSize = ByteUtils.readIntLE(bytes, 0);
         int nameSize = encodedSize % PARENT_OFFSET_IN_NAME_OFFSET;
         int parentOffset = encodedSize / PARENT_OFFSET_IN_NAME_OFFSET;
-        String name = new String(bytes, 4, nameSize, StandardCharsets.UTF_8);
-        BytesRef value = new BytesRef(bytes, 4 + nameSize, bytes.length - nameSize - 4);
+
+        String decoded = new String(bytes, 4, bytes.length - 4, StandardCharsets.UTF_8);
+        String name = decoded.substring(0, nameSize);
+        int nameByteCount = name.getBytes(StandardCharsets.UTF_8).length;
+
+        BytesRef value = new BytesRef(bytes, 4 + nameByteCount, bytes.length - nameByteCount - 4);
         return new NameValue(name, parentOffset, value, null);
     }
 
diff --git a/server/src/test/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapperTests.java
@@ -10,12 +10,14 @@
 package org.elasticsearch.index.mapper;
 
 import org.apache.lucene.index.DirectoryReader;
+import org.elasticsearch.common.Strings;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.core.CheckedConsumer;
 import org.elasticsearch.core.Nullable;
 import org.elasticsearch.search.lookup.SourceFilter;
 import org.elasticsearch.test.FieldMaskingReader;
 import org.elasticsearch.xcontent.XContentBuilder;
+import org.elasticsearch.xcontent.json.JsonXContent;
 import org.hamcrest.Matchers;
 
 import java.io.IOException;
@@ -122,6 +124,15 @@ public void testIgnoredString() throws IOException {
         );
     }
 
+    public void testIgnoredStringFullUnicode() throws IOException {
+        String value = randomUnicodeOfCodepointLengthBetween(5, 20);
+        String fieldName = randomUnicodeOfCodepointLength(5);
+
+        String expected = Strings.toString(JsonXContent.contentBuilder().startObject().field(fieldName, value).endObject());
+
+        assertEquals(expected, getSyntheticSourceWithFieldLimit(b -> b.field(fieldName, value)));
+    }
+
     public void testIgnoredInt() throws IOException {
         int value = randomInt();
         assertEquals("{\"my_value\":" + value + "}", getSyntheticSourceWithFieldLimit(b -> b.field("my_value", value)));