FasterXML · cowtowncoder · Mar 4, 2026 · Mar 4, 2026 · Mar 4, 2026
diff --git a/release-notes/VERSION-2.x b/release-notes/VERSION-2.x
@@ -14,6 +14,11 @@ a pure JSON library.
 === Releases ===
 ------------------------------------------------------------------------
 
+2.21.2 (not yet released)
+
+#1541: Unexpected Illegal surrogate character when parsing field names
+ (fix by @cowtowncoder, w/ Claude code)
+
 2.21.1 (22-Feb-2026)
 
 #1548: `StreamReadConstraints.maxDocumentLength` not checked when

diff --git a/src/main/java/com/fasterxml/jackson/core/json/UTF8StreamJsonParser.java b/src/main/java/com/fasterxml/jackson/core/json/UTF8StreamJsonParser.java
@@ -2009,6 +2009,28 @@ protected final String parseEscapedName(int[] quads, int qlen, int currQuad, int
                     // Nope, escape sequence
                     ch = _decodeEscaped();
                 }
+                // [jackson-core#1541]: Handle JSON-escaped surrogate pairs in field names
+                if (ch >= 0xD800 && ch <= 0xDBFF) { // high surrogate
+                    // Must be followed by low surrogate escape
+                    if (_inputPtr >= _inputEnd) {
+                        if (!_loadMore()) {
+                            _reportInvalidEOF(" in field name", JsonToken.FIELD_NAME);
+                        }
+                    }
+                    if (_inputBuffer[_inputPtr] != INT_BACKSLASH) {
+                        _reportError("Broken surrogate pair in field name: expected '\\' to start low surrogate, got 0x"
+                                + Integer.toHexString(_inputBuffer[_inputPtr] & 0xFF));
+                    }
+                    ++_inputPtr;
+                    int lo = _decodeEscaped();
+                    if (lo < 0xDC00 || lo > 0xDFFF) {
+                        _reportError(String.format(
+                                "Broken surrogate pair in field name: expected low surrogate, got 0x%04X", lo));
+                    }
+                    ch = 0x10000 + ((ch - 0xD800) << 10) + (lo - 0xDC00);
+                } else if (ch >= 0xDC00 && ch <= 0xDFFF) { // lone low surrogate
+                    _reportError("Unexpected low surrogate in field name: 0x" + Integer.toHexString(ch));
+                }
                 // Oh crap. May need to UTF-8 (re-)encode it, if it's beyond
                 // 7-bit ASCII. Gets pretty messy. If this happens often, may
                 // want to use different name canonicalization to avoid these hits.
@@ -2026,7 +2048,7 @@ protected final String parseEscapedName(int[] quads, int qlen, int currQuad, int
                         currQuad = (currQuad << 8) | (0xc0 | (ch >> 6));
                         ++currQuadBytes;
                         // Second byte gets output below:
-                    } else { // 3 bytes; no need to worry about surrogates here
+                    } else if (ch < 0x10000) { // 3 bytes
                         currQuad = (currQuad << 8) | (0xe0 | (ch >> 12));
                         ++currQuadBytes;
                         // need room for middle byte?
@@ -2040,6 +2062,29 @@ protected final String parseEscapedName(int[] quads, int qlen, int currQuad, int
                         }
                         currQuad = (currQuad << 8) | (0x80 | ((ch >> 6) & 0x3f));
                         ++currQuadBytes;
+                    } else { // 4 bytes (supplementary character)
+                        currQuad = (currQuad << 8) | (0xf0 | (ch >> 18));
+                        ++currQuadBytes;
+                        if (currQuadBytes >= 4) {
+                            if (qlen >= quads.length) {
+                                _quadBuffer = quads = _growNameDecodeBuffer(quads, quads.length);
+                            }
+                            quads[qlen++] = currQuad;
+                            currQuad = 0;
+                            currQuadBytes = 0;
+                        }
+                        currQuad = (currQuad << 8) | (0x80 | ((ch >> 12) & 0x3f));
+                        ++currQuadBytes;
+                        if (currQuadBytes >= 4) {
+                            if (qlen >= quads.length) {
+                                _quadBuffer = quads = _growNameDecodeBuffer(quads, quads.length);
+                            }
+                            quads[qlen++] = currQuad;
+                            currQuad = 0;
+                            currQuadBytes = 0;
+                        }
+                        currQuad = (currQuad << 8) | (0x80 | ((ch >> 6) & 0x3f));
+                        ++currQuadBytes;
                     }
                     // And same last byte in both cases, gets output below:
                     ch = 0x80 | (ch & 0x3f);
@@ -2196,6 +2241,27 @@ protected String _parseAposName() throws IOException
                     // Nope, escape sequence
                     ch = _decodeEscaped();
                 }
+                // [jackson-core#1541]: Handle JSON-escaped surrogate pairs in field names
+                if (ch >= 0xD800 && ch <= 0xDBFF) { // high surrogate
+                    if (_inputPtr >= _inputEnd) {
+                        if (!_loadMore()) {
+                            _reportInvalidEOF(" in field name", JsonToken.FIELD_NAME);
+                        }
+                    }
+                    if (_inputBuffer[_inputPtr] != INT_BACKSLASH) {
+                        _reportError("Broken surrogate pair in field name: expected '\\' to start low surrogate, got 0x"
+                                + Integer.toHexString(_inputBuffer[_inputPtr] & 0xFF));
+                    }
+                    ++_inputPtr;
+                    int lo = _decodeEscaped();
+                    if (lo < 0xDC00 || lo > 0xDFFF) {
+                        _reportError(String.format(
+                                "Broken surrogate pair in field name: expected low surrogate, got 0x%04X", lo));
+                    }
+                    ch = 0x10000 + ((ch - 0xD800) << 10) + (lo - 0xDC00);
+                } else if (ch >= 0xDC00 && ch <= 0xDFFF) { // lone low surrogate
+                    _reportError("Unexpected low surrogate in field name: 0x" + Integer.toHexString(ch));
+                }
                 // as per main code, inefficient but will have to do
                 if (ch > 127) {
                     // Ok, we'll need room for first byte right away
@@ -2211,7 +2277,7 @@ protected String _parseAposName() throws IOException
                         currQuad = (currQuad << 8) | (0xc0 | (ch >> 6));
                         ++currQuadBytes;
                         // Second byte gets output below:
-                    } else { // 3 bytes; no need to worry about surrogates here
+                    } else if (ch < 0x10000) { // 3 bytes
                         currQuad = (currQuad << 8) | (0xe0 | (ch >> 12));
                         ++currQuadBytes;
                         // need room for middle byte?
@@ -2225,6 +2291,29 @@ protected String _parseAposName() throws IOException
                         }
                         currQuad = (currQuad << 8) | (0x80 | ((ch >> 6) & 0x3f));
                         ++currQuadBytes;
+                    } else { // 4 bytes (supplementary character)
+                        currQuad = (currQuad << 8) | (0xf0 | (ch >> 18));
+                        ++currQuadBytes;
+                        if (currQuadBytes >= 4) {
+                            if (qlen >= quads.length) {
+                                _quadBuffer = quads = _growNameDecodeBuffer(quads, quads.length);
+                            }
+                            quads[qlen++] = currQuad;
+                            currQuad = 0;
+                            currQuadBytes = 0;
+                        }
+                        currQuad = (currQuad << 8) | (0x80 | ((ch >> 12) & 0x3f));
+                        ++currQuadBytes;
+                        if (currQuadBytes >= 4) {
+                            if (qlen >= quads.length) {
+                                _quadBuffer = quads = _growNameDecodeBuffer(quads, quads.length);
+                            }
+                            quads[qlen++] = currQuad;
+                            currQuad = 0;
+                            currQuadBytes = 0;
+                        }
+                        currQuad = (currQuad << 8) | (0x80 | ((ch >> 6) & 0x3f));
+                        ++currQuadBytes;
                     }
                     // And same last byte in both cases, gets output below:
                     ch = 0x80 | (ch & 0x3f);

diff --git a/src/test/java/com/fasterxml/jackson/core/read/EscapedSurrogateInFieldName1541Test.java b/src/test/java/com/fasterxml/jackson/core/read/EscapedSurrogateInFieldName1541Test.java
@@ -0,0 +1,126 @@
+package com.fasterxml.jackson.core.read;
+
+import java.io.*;
+
+import org.junit.jupiter.api.Test;
+
+import com.fasterxml.jackson.core.*;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+/**
+ * Tests for [jackson-core#1541]: JSON-escaped surrogate pairs (\ud83d\udc4d) in
+ * field names should be decoded correctly by the UTF-8 stream parser.
+ */
+class EscapedSurrogateInFieldName1541Test extends JUnit5TestBase
+{
+    private final JsonFactory FACTORY = newStreamFactory();
+
+    /**
+     * Test that JSON-escaped surrogate pair in field name is accepted when
+     * parsing from a byte stream (UTF-8 parser path).
+     */
+    @Test
+    void escapedSurrogatePairInFieldNameBytes() throws Exception
+    {
+        // JSON: {"\ud83d\udc4d":"value"}
+        byte[] doc = new byte[] {
+            '{', '"',
+            '\\', 'u', 'd', '8', '3', 'd',  // JSON escape: \ud83d (high surrogate)
+            '\\', 'u', 'd', 'c', '4', 'd',  // JSON escape: \udc4d (low surrogate)
+            '"', ':', '"', 'v', 'a', 'l', 'u', 'e', '"',
+            '}'
+        };
+
+        try (JsonParser p = FACTORY.createParser(doc)) {
+            assertToken(JsonToken.START_OBJECT, p.nextToken());
+            assertToken(JsonToken.FIELD_NAME, p.nextToken());
+            // The escaped surrogate pair should decode to U+1F44D (thumbs up emoji)
+            assertEquals("\uD83D\uDC4D", p.currentName());
+            assertToken(JsonToken.VALUE_STRING, p.nextToken());
+            assertEquals("value", p.getText());
+            assertToken(JsonToken.END_OBJECT, p.nextToken());
+        }
+    }
+
+    /**
+     * Test that JSON-escaped surrogate pair in field name is accepted when
+     * parsing from an InputStream (streaming UTF-8 parser path).
+     */
+    @Test
+    void escapedSurrogatePairInFieldNameStream() throws Exception
+    {
+        // JSON: {"\ud83d\udc4d":"value"}
+        byte[] doc = new byte[] {
+            '{', '"',
+            '\\', 'u', 'd', '8', '3', 'd',
+            '\\', 'u', 'd', 'c', '4', 'd',
+            '"', ':', '"', 'v', 'a', 'l', 'u', 'e', '"',
+            '}'
+        };
+
+        try (JsonParser p = FACTORY.createParser(new ByteArrayInputStream(doc))) {
+            assertToken(JsonToken.START_OBJECT, p.nextToken());
+            assertToken(JsonToken.FIELD_NAME, p.nextToken());
+            assertEquals("\uD83D\uDC4D", p.currentName());
+            assertToken(JsonToken.VALUE_STRING, p.nextToken());
+            assertEquals("value", p.getText());
+            assertToken(JsonToken.END_OBJECT, p.nextToken());
+        }
+    }
+
+    /**
+     * Test using the exact repro from issue #1566: write supplementary character
+     * with generator, then read it back with streaming parser.
+     */
+    @Test
+    void surrogateRoundTripFromIssue1566() throws Exception
+    {
+        // U+1F44D is a supplementary character, stored in Java as surrogate pair \uD83D\uDC4D
+        String fieldName = "\uD83D\uDC4D";
+
+        ByteArrayOutputStream out = new ByteArrayOutputStream();
+        try (JsonGenerator gen = FACTORY.createGenerator(out, JsonEncoding.UTF8)) {
+            gen.writeStartObject();
+            gen.writeStringField(fieldName, "value");
+            gen.writeEndObject();
+        }
+
+        byte[] json = out.toByteArray();
+
+        try (JsonParser parser = FACTORY.createParser(new ByteArrayInputStream(json))) {
+            assertToken(JsonToken.START_OBJECT, parser.nextToken());
+            assertToken(JsonToken.FIELD_NAME, parser.nextToken());
+            assertEquals(fieldName, parser.currentName());
+            assertToken(JsonToken.VALUE_STRING, parser.nextToken());
+            assertEquals("value", parser.getText());
+            assertToken(JsonToken.END_OBJECT, parser.nextToken());
+        }
+    }
+
+    /**
+     * Test that JSON-escaped surrogate pair in string value still works
+     * (this was already working before, but let's verify it's not broken).
+     */
+    @Test
+    void escapedSurrogatePairInStringValue() throws Exception
+    {
+        // JSON: {"key":"\ud83d\udc4d"}
+        byte[] doc = new byte[] {
+            '{', '"', 'k', 'e', 'y', '"', ':', '"',
+            '\\', 'u', 'd', '8', '3', 'd',
+            '\\', 'u', 'd', 'c', '4', 'd',
+            '"',
+            '}'
+        };
+
+        try (JsonParser p = FACTORY.createParser(new ByteArrayInputStream(doc))) {
+            assertToken(JsonToken.START_OBJECT, p.nextToken());
+            assertToken(JsonToken.FIELD_NAME, p.nextToken());
+            assertEquals("key", p.currentName());
+            assertToken(JsonToken.VALUE_STRING, p.nextToken());
+            assertEquals("\uD83D\uDC4D", p.getText());
+            assertToken(JsonToken.END_OBJECT, p.nextToken());
+        }
+    }
+}