Fix decoding of non-ascii field names in ignored source (#132018) (#132031)

jordan-powers · web-flow · commit a8114cceb272 · 2025-07-28T19:50:41.000+02:00
When encoding an ignored source entry, we write the string length of the 
field name, not the encoded byte count; however, the decode logic treats
this encoded value as the byte length. This patch updates the decode logic
to instead properly treat the value as the string length.
diff --git a/docs/changelog/132018.yaml b/docs/changelog/132018.yaml
@@ -0,0 +1,5 @@
+pr: 132018
+summary: Fix decoding of non-ascii field names in ignored source
+area: Mapping
+type: bug
+issues: []
diff --git a/server/src/main/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapper.java
@@ -179,8 +179,12 @@ static NameValue decode(Object field) {
         int encodedSize = ByteUtils.readIntLE(bytes, 0);
         int nameSize = encodedSize % PARENT_OFFSET_IN_NAME_OFFSET;
         int parentOffset = encodedSize / PARENT_OFFSET_IN_NAME_OFFSET;
-        String name = new String(bytes, 4, nameSize, StandardCharsets.UTF_8);
-        BytesRef value = new BytesRef(bytes, 4 + nameSize, bytes.length - nameSize - 4);
+
+        String decoded = new String(bytes, 4, bytes.length - 4, StandardCharsets.UTF_8);
+        String name = decoded.substring(0, nameSize);
+        int nameByteCount = name.getBytes(StandardCharsets.UTF_8).length;
+
+        BytesRef value = new BytesRef(bytes, 4 + nameByteCount, bytes.length - nameByteCount - 4);
         return new NameValue(name, parentOffset, value, null);
     }
 
diff --git a/server/src/test/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapperTests.java
@@ -10,12 +10,14 @@
 package org.elasticsearch.index.mapper;
 
 import org.apache.lucene.index.DirectoryReader;
+import org.elasticsearch.common.Strings;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.core.CheckedConsumer;
 import org.elasticsearch.core.Nullable;
 import org.elasticsearch.search.lookup.SourceFilter;
 import org.elasticsearch.test.FieldMaskingReader;
 import org.elasticsearch.xcontent.XContentBuilder;
+import org.elasticsearch.xcontent.json.JsonXContent;
 import org.hamcrest.Matchers;
 
 import java.io.IOException;
@@ -122,6 +124,15 @@ public void testIgnoredString() throws IOException {
         );
     }
 
+    public void testIgnoredStringFullUnicode() throws IOException {
+        String value = randomUnicodeOfCodepointLengthBetween(5, 20);
+        String fieldName = randomUnicodeOfCodepointLength(5);
+
+        String expected = Strings.toString(JsonXContent.contentBuilder().startObject().field(fieldName, value).endObject());
+
+        assertEquals(expected, getSyntheticSourceWithFieldLimit(b -> b.field(fieldName, value)));
+    }
+
     public void testIgnoredInt() throws IOException {
         int value = randomInt();
         assertEquals("{\"my_value\":" + value + "}", getSyntheticSourceWithFieldLimit(b -> b.field("my_value", value)));