[9.1] Disable utf-8 parsing optimization (#135180)

martijnvg · web-flow · commit 6e5d99abdcd5 · 2025-09-22T14:22:46.000+02:00
* Disable utf-8 parsing optimization (#135172) A yet to be understood issue has emerged with the utf8 parsing optimization and the meantime we should disable the optimization. It looks like the wrong values are returned for fields. In the case of the test failures here, that surfaced as unable to the parse fields with IPs and causing index request to fail. However the impact could be larger. * fix test compile errors
diff --git a/libs/x-content/impl/src/main/java/org/elasticsearch/xcontent/provider/json/JsonXContentParser.java b/libs/x-content/impl/src/main/java/org/elasticsearch/xcontent/provider/json/JsonXContentParser.java
@@ -143,15 +143,7 @@ public String text() throws IOException {
 
     @Override
     public XContentString optimizedText() throws IOException {
-        if (currentToken().isValue() == false) {
-            throwOnNoText();
-        }
-        if (parser instanceof ESUTF8StreamJsonParser esParser) {
-            var bytesRef = esParser.getValueAsText();
-            if (bytesRef != null) {
-                return bytesRef;
-            }
-        }
+        // TODO: enable utf-8 parsing optimization once verified it is completely safe
         return new Text(text());
     }
 
diff --git a/server/src/test/java/org/elasticsearch/common/xcontent/json/JsonXContentTests.java b/server/src/test/java/org/elasticsearch/common/xcontent/json/JsonXContentTests.java
@@ -9,7 +9,10 @@
 
 package org.elasticsearch.common.xcontent.json;
 
+import org.elasticsearch.common.bytes.BytesReference;
 import org.elasticsearch.common.xcontent.BaseXContentTestCase;
+import org.elasticsearch.xcontent.Text;
+import org.elasticsearch.xcontent.XContentBuilder;
 import org.elasticsearch.xcontent.XContentGenerator;
 import org.elasticsearch.xcontent.XContentParseException;
 import org.elasticsearch.xcontent.XContentParser;
@@ -18,6 +21,9 @@
 import org.elasticsearch.xcontent.json.JsonXContent;
 
 import java.io.ByteArrayOutputStream;
+import java.util.Set;
+
+import static org.hamcrest.Matchers.equalTo;
 
 public class JsonXContentTests extends BaseXContentTestCase {
 
@@ -41,4 +47,21 @@ public void testMalformedJsonFieldThrowsXContentException() throws Exception {
             assertThrows(XContentParseException.class, () -> parser.text());
         }
     }
+
+    public void testOptimizedTextHasBytes() throws Exception {
+        XContentBuilder builder = builder().startObject().field("text", new Text("foo")).endObject();
+        XContentParserConfiguration parserConfig = parserConfig();
+        if (randomBoolean()) {
+            parserConfig = parserConfig.withFiltering(null, Set.of("*"), null, true);
+        }
+        try (XContentParser parser = createParser(parserConfig, xcontentType().xContent(), BytesReference.bytes(builder))) {
+            assertSame(XContentParser.Token.START_OBJECT, parser.nextToken());
+            assertSame(XContentParser.Token.FIELD_NAME, parser.nextToken());
+            assertTrue(parser.nextToken().isValue());
+            Text text = (Text) parser.optimizedText();
+            // TODO: uncomment after utf8 optimized parsing has been enabled again:
+            // assertTrue(text.hasBytes());
+            assertThat(text.string(), equalTo("foo"));
+        }
+    }
 }