Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions release-notes/VERSION-2.x
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@ a pure JSON library.
=== Releases ===
------------------------------------------------------------------------

2.21.2 (not yet released)

#1541: Unexpected Illegal surrogate character when parsing field names
(fix by @cowtowncoder, w/ Claude code)

2.21.1 (22-Feb-2026)

#1548: `StreamReadConstraints.maxDocumentLength` not checked when
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2009,6 +2009,28 @@ protected final String parseEscapedName(int[] quads, int qlen, int currQuad, int
// Nope, escape sequence
ch = _decodeEscaped();
}
// [jackson-core#1541]: Handle JSON-escaped surrogate pairs in field names
if (ch >= 0xD800 && ch <= 0xDBFF) { // high surrogate
// Must be followed by low surrogate escape
if (_inputPtr >= _inputEnd) {
if (!_loadMore()) {
_reportInvalidEOF(" in field name", JsonToken.FIELD_NAME);
}
}
if (_inputBuffer[_inputPtr] != INT_BACKSLASH) {
_reportError("Broken surrogate pair in field name: expected '\\' to start low surrogate, got 0x"
+ Integer.toHexString(_inputBuffer[_inputPtr] & 0xFF));
}
++_inputPtr;
int lo = _decodeEscaped();
if (lo < 0xDC00 || lo > 0xDFFF) {
_reportError(String.format(
"Broken surrogate pair in field name: expected low surrogate, got 0x%04X", lo));
}
ch = 0x10000 + ((ch - 0xD800) << 10) + (lo - 0xDC00);
} else if (ch >= 0xDC00 && ch <= 0xDFFF) { // lone low surrogate
_reportError("Unexpected low surrogate in field name: 0x" + Integer.toHexString(ch));
}
// Oh crap. May need to UTF-8 (re-)encode it, if it's beyond
// 7-bit ASCII. Gets pretty messy. If this happens often, may
// want to use different name canonicalization to avoid these hits.
Expand All @@ -2026,7 +2048,7 @@ protected final String parseEscapedName(int[] quads, int qlen, int currQuad, int
currQuad = (currQuad << 8) | (0xc0 | (ch >> 6));
++currQuadBytes;
// Second byte gets output below:
} else { // 3 bytes; no need to worry about surrogates here
} else if (ch < 0x10000) { // 3 bytes
currQuad = (currQuad << 8) | (0xe0 | (ch >> 12));
++currQuadBytes;
// need room for middle byte?
Expand All @@ -2040,6 +2062,29 @@ protected final String parseEscapedName(int[] quads, int qlen, int currQuad, int
}
currQuad = (currQuad << 8) | (0x80 | ((ch >> 6) & 0x3f));
++currQuadBytes;
} else { // 4 bytes (supplementary character)
currQuad = (currQuad << 8) | (0xf0 | (ch >> 18));
++currQuadBytes;
if (currQuadBytes >= 4) {
if (qlen >= quads.length) {
_quadBuffer = quads = _growNameDecodeBuffer(quads, quads.length);
}
quads[qlen++] = currQuad;
currQuad = 0;
currQuadBytes = 0;
}
currQuad = (currQuad << 8) | (0x80 | ((ch >> 12) & 0x3f));
++currQuadBytes;
if (currQuadBytes >= 4) {
if (qlen >= quads.length) {
_quadBuffer = quads = _growNameDecodeBuffer(quads, quads.length);
}
quads[qlen++] = currQuad;
currQuad = 0;
currQuadBytes = 0;
}
currQuad = (currQuad << 8) | (0x80 | ((ch >> 6) & 0x3f));
++currQuadBytes;
}
// And same last byte in both cases, gets output below:
ch = 0x80 | (ch & 0x3f);
Expand Down Expand Up @@ -2196,6 +2241,27 @@ protected String _parseAposName() throws IOException
// Nope, escape sequence
ch = _decodeEscaped();
}
// [jackson-core#1541]: Handle JSON-escaped surrogate pairs in field names
if (ch >= 0xD800 && ch <= 0xDBFF) { // high surrogate
if (_inputPtr >= _inputEnd) {
if (!_loadMore()) {
_reportInvalidEOF(" in field name", JsonToken.FIELD_NAME);
}
}
if (_inputBuffer[_inputPtr] != INT_BACKSLASH) {
_reportError("Broken surrogate pair in field name: expected '\\' to start low surrogate, got 0x"
+ Integer.toHexString(_inputBuffer[_inputPtr] & 0xFF));
}
++_inputPtr;
int lo = _decodeEscaped();
if (lo < 0xDC00 || lo > 0xDFFF) {
_reportError(String.format(
"Broken surrogate pair in field name: expected low surrogate, got 0x%04X", lo));
}
ch = 0x10000 + ((ch - 0xD800) << 10) + (lo - 0xDC00);
} else if (ch >= 0xDC00 && ch <= 0xDFFF) { // lone low surrogate
_reportError("Unexpected low surrogate in field name: 0x" + Integer.toHexString(ch));
}
// as per main code, inefficient but will have to do
if (ch > 127) {
// Ok, we'll need room for first byte right away
Expand All @@ -2211,7 +2277,7 @@ protected String _parseAposName() throws IOException
currQuad = (currQuad << 8) | (0xc0 | (ch >> 6));
++currQuadBytes;
// Second byte gets output below:
} else { // 3 bytes; no need to worry about surrogates here
} else if (ch < 0x10000) { // 3 bytes
currQuad = (currQuad << 8) | (0xe0 | (ch >> 12));
++currQuadBytes;
// need room for middle byte?
Expand All @@ -2225,6 +2291,29 @@ protected String _parseAposName() throws IOException
}
currQuad = (currQuad << 8) | (0x80 | ((ch >> 6) & 0x3f));
++currQuadBytes;
} else { // 4 bytes (supplementary character)
currQuad = (currQuad << 8) | (0xf0 | (ch >> 18));
++currQuadBytes;
if (currQuadBytes >= 4) {
if (qlen >= quads.length) {
_quadBuffer = quads = _growNameDecodeBuffer(quads, quads.length);
}
quads[qlen++] = currQuad;
currQuad = 0;
currQuadBytes = 0;
}
currQuad = (currQuad << 8) | (0x80 | ((ch >> 12) & 0x3f));
++currQuadBytes;
if (currQuadBytes >= 4) {
if (qlen >= quads.length) {
_quadBuffer = quads = _growNameDecodeBuffer(quads, quads.length);
}
quads[qlen++] = currQuad;
currQuad = 0;
currQuadBytes = 0;
}
currQuad = (currQuad << 8) | (0x80 | ((ch >> 6) & 0x3f));
++currQuadBytes;
}
// And same last byte in both cases, gets output below:
ch = 0x80 | (ch & 0x3f);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
package com.fasterxml.jackson.core.read;

import java.io.*;

import org.junit.jupiter.api.Test;

import com.fasterxml.jackson.core.*;

import static org.junit.jupiter.api.Assertions.assertEquals;

/**
* Tests for [jackson-core#1541]: JSON-escaped surrogate pairs (\ud83d\udc4d) in
* field names should be decoded correctly by the UTF-8 stream parser.
*/
class EscapedSurrogateInFieldName1541Test extends JUnit5TestBase
{
private final JsonFactory FACTORY = newStreamFactory();

/**
* Test that JSON-escaped surrogate pair in field name is accepted when
* parsing from a byte stream (UTF-8 parser path).
*/
@Test
void escapedSurrogatePairInFieldNameBytes() throws Exception
{
// JSON: {"\ud83d\udc4d":"value"}
byte[] doc = new byte[] {
'{', '"',
'\\', 'u', 'd', '8', '3', 'd', // JSON escape: \ud83d (high surrogate)
'\\', 'u', 'd', 'c', '4', 'd', // JSON escape: \udc4d (low surrogate)
'"', ':', '"', 'v', 'a', 'l', 'u', 'e', '"',
'}'
};

try (JsonParser p = FACTORY.createParser(doc)) {
assertToken(JsonToken.START_OBJECT, p.nextToken());
assertToken(JsonToken.FIELD_NAME, p.nextToken());
// The escaped surrogate pair should decode to U+1F44D (thumbs up emoji)
assertEquals("\uD83D\uDC4D", p.currentName());
assertToken(JsonToken.VALUE_STRING, p.nextToken());
assertEquals("value", p.getText());
assertToken(JsonToken.END_OBJECT, p.nextToken());
}
}

/**
* Test that JSON-escaped surrogate pair in field name is accepted when
* parsing from an InputStream (streaming UTF-8 parser path).
*/
@Test
void escapedSurrogatePairInFieldNameStream() throws Exception
{
// JSON: {"\ud83d\udc4d":"value"}
byte[] doc = new byte[] {
'{', '"',
'\\', 'u', 'd', '8', '3', 'd',
'\\', 'u', 'd', 'c', '4', 'd',
'"', ':', '"', 'v', 'a', 'l', 'u', 'e', '"',
'}'
};

try (JsonParser p = FACTORY.createParser(new ByteArrayInputStream(doc))) {
assertToken(JsonToken.START_OBJECT, p.nextToken());
assertToken(JsonToken.FIELD_NAME, p.nextToken());
assertEquals("\uD83D\uDC4D", p.currentName());
assertToken(JsonToken.VALUE_STRING, p.nextToken());
assertEquals("value", p.getText());
assertToken(JsonToken.END_OBJECT, p.nextToken());
}
}

/**
* Test using the exact repro from issue #1566: write supplementary character
* with generator, then read it back with streaming parser.
*/
@Test
void surrogateRoundTripFromIssue1566() throws Exception
{
// U+1F44D is a supplementary character, stored in Java as surrogate pair \uD83D\uDC4D
String fieldName = "\uD83D\uDC4D";

ByteArrayOutputStream out = new ByteArrayOutputStream();
try (JsonGenerator gen = FACTORY.createGenerator(out, JsonEncoding.UTF8)) {
gen.writeStartObject();
gen.writeStringField(fieldName, "value");
gen.writeEndObject();
}

byte[] json = out.toByteArray();

try (JsonParser parser = FACTORY.createParser(new ByteArrayInputStream(json))) {
assertToken(JsonToken.START_OBJECT, parser.nextToken());
assertToken(JsonToken.FIELD_NAME, parser.nextToken());
assertEquals(fieldName, parser.currentName());
assertToken(JsonToken.VALUE_STRING, parser.nextToken());
assertEquals("value", parser.getText());
assertToken(JsonToken.END_OBJECT, parser.nextToken());
}
}

/**
* Test that JSON-escaped surrogate pair in string value still works
* (this was already working before, but let's verify it's not broken).
*/
@Test
void escapedSurrogatePairInStringValue() throws Exception
{
// JSON: {"key":"\ud83d\udc4d"}
byte[] doc = new byte[] {
'{', '"', 'k', 'e', 'y', '"', ':', '"',
'\\', 'u', 'd', '8', '3', 'd',
'\\', 'u', 'd', 'c', '4', 'd',
'"',
'}'
};

try (JsonParser p = FACTORY.createParser(new ByteArrayInputStream(doc))) {
assertToken(JsonToken.START_OBJECT, p.nextToken());
assertToken(JsonToken.FIELD_NAME, p.nextToken());
assertEquals("key", p.currentName());
assertToken(JsonToken.VALUE_STRING, p.nextToken());
assertEquals("\uD83D\uDC4D", p.getText());
assertToken(JsonToken.END_OBJECT, p.nextToken());
}
}
}