[JsonPath] Fix parsing invalid Unicode codepoints

nicolas-grekas · nicolas-grekas · commit 66c8a1a51258 · 2025-07-22T13:36:39.000+02:00
diff --git a/src/Symfony/Component/JsonPath/JsonCrawler.php b/src/Symfony/Component/JsonPath/JsonCrawler.php
@@ -12,6 +12,7 @@
 namespace Symfony\Component\JsonPath;
 
 use Symfony\Component\JsonPath\Exception\InvalidArgumentException;
+use Symfony\Component\JsonPath\Exception\InvalidJsonPathException;
 use Symfony\Component\JsonPath\Exception\InvalidJsonStringInputException;
 use Symfony\Component\JsonPath\Exception\JsonCrawlerException;
 use Symfony\Component\JsonPath\Tokenizer\JsonPathToken;
@@ -83,7 +84,7 @@ private function evaluate(JsonPath $query): array
             return $this->evaluateTokensOnDecodedData($tokens, $data);
         } catch (InvalidArgumentException $e) {
             throw $e;
-        } catch (\Throwable $e) {
+        } catch (InvalidJsonPathException $e) {
             throw new JsonCrawlerException($query, $e->getMessage(), previous: $e);
         }
     }
@@ -329,7 +330,7 @@ private function evaluateBracket(string $expr, mixed $value): array
             return \array_key_exists($key, $value) ? [$value[$key]] : [];
         }
 
-        throw new \LogicException(\sprintf('Unsupported bracket expression "%s".', $expr));
+        throw new InvalidJsonPathException(\sprintf('Unsupported bracket expression "%s".', $expr));
     }
 
     private function evaluateFilter(string $expr, mixed $value): array
diff --git a/src/Symfony/Component/JsonPath/JsonPathUtils.php b/src/Symfony/Component/JsonPath/JsonPathUtils.php
@@ -117,7 +117,7 @@ public static function unescapeString(string $str, string $quoteChar): string
                     't' => "\t",
                     'u' => self::unescapeUnicodeSequence($str, $i),
                     $quoteChar => $quoteChar,
-                    default => throw new JsonCrawlerException('', \sprintf('Invalid escape sequence "\\%s" in %s-quoted string', $str[$i + 1], "'" === $quoteChar ? 'single' : 'double')),
+                    default => throw new JsonCrawlerException('', \sprintf('Invalid escape sequence "\\%s" in %s-quoted string.', $str[$i + 1], "'" === $quoteChar ? 'single' : 'double')),
                 };
 
                 ++$i;
@@ -132,30 +132,33 @@ public static function unescapeString(string $str, string $quoteChar): string
     private static function unescapeUnicodeSequence(string $str, int &$i): string
     {
         if (!isset($str[$i + 5]) || !ctype_xdigit(substr($str, $i + 2, 4))) {
-            throw new JsonCrawlerException('', 'Invalid unicode escape sequence');
+            throw new JsonCrawlerException('', 'Invalid unicode escape sequence.');
         }
 
-        $hex = substr($str, $i + 2, 4);
+        $codepoint = hexdec(substr($str, $i + 2, 4));
 
-        $codepoint = hexdec($hex);
         // looks like a valid Unicode codepoint, string length is sufficient and it starts with \u
-        if (0xD800 <= $codepoint && $codepoint <= 0xDBFF && isset($str[$i + 11]) && '\\' === $str[$i + 6] && 'u' === $str[$i + 7]) {
-            $lowHex = substr($str, $i + 8, 4);
-            if (ctype_xdigit($lowHex)) {
-                $lowSurrogate = hexdec($lowHex);
-                if (0xDC00 <= $lowSurrogate && $lowSurrogate <= 0xDFFF) {
-                    $codepoint = 0x10000 + (($codepoint & 0x3FF) << 10) + ($lowSurrogate & 0x3FF);
-                    $i += 10; // skip surrogate pair
-
-                    return mb_chr($codepoint, 'UTF-8');
-                }
-            }
+        if (0xD800 <= $codepoint
+            && $codepoint <= 0xDBFF
+            && isset($str[$i + 11])
+            && '\\' === $str[$i + 6]
+            && 'u' === $str[$i + 7]
+            && ctype_xdigit($lowSurrogate = substr($str, $i + 8, 4))
+            && 0xDC00 <= ($lowSurrogate = hexdec($lowSurrogate))
+            && $lowSurrogate <= 0xDFFF
+        ) {
+            $codepoint = 0x10000 + (($codepoint & 0x3FF) << 10) + ($lowSurrogate & 0x3FF);
+            $i += 10; // skip surrogate pair
+        } else {
+            // single Unicode character or invalid surrogate, skip the sequence
+            $i += 4;
         }
 
-        // single Unicode character or invalid surrogate, skip the sequence
-        $i += 4;
+        if (false === $chr = mb_chr($codepoint, 'UTF-8')) {
+            throw new JsonCrawlerException('', \sprintf('Invalid Unicode codepoint: U+%04X.', $codepoint));
+        }
 
-        return mb_chr($codepoint, 'UTF-8');
+        return $chr;
     }
 
     /**

Original file line number	Diff line number	Diff line change
`@@ -12,6 +12,7 @@`
`12`	`12`	`namespace Symfony\Component\JsonPath;`
`13`	`13`
`14`	`14`	`use Symfony\Component\JsonPath\Exception\InvalidArgumentException;`
	`15`	`+use Symfony\Component\JsonPath\Exception\InvalidJsonPathException;`
`15`	`16`	`use Symfony\Component\JsonPath\Exception\InvalidJsonStringInputException;`
`16`	`17`	`use Symfony\Component\JsonPath\Exception\JsonCrawlerException;`
`17`	`18`	`use Symfony\Component\JsonPath\Tokenizer\JsonPathToken;`
`@@ -83,7 +84,7 @@ private function evaluate(JsonPath $query): array`
`83`	`84`	`return $this->evaluateTokensOnDecodedData($tokens, $data);`
`84`	`85`	`} catch (InvalidArgumentException $e) {`
`85`	`86`	`throw $e;`
`86`		`- } catch (\Throwable $e) {`
	`87`	`+ } catch (InvalidJsonPathException $e) {`
`87`	`88`	`throw new JsonCrawlerException($query, $e->getMessage(), previous: $e);`
`88`	`89`	`}`
`89`	`90`	`}`
`@@ -329,7 +330,7 @@ private function evaluateBracket(string $expr, mixed $value): array`
`329`	`330`	`return \array_key_exists($key, $value) ? [$value[$key]] : [];`
`330`	`331`	`}`
`331`	`332`
`332`		`- throw new \LogicException(\sprintf('Unsupported bracket expression "%s".', $expr));`
	`333`	`+ throw new InvalidJsonPathException(\sprintf('Unsupported bracket expression "%s".', $expr));`
`333`	`334`	`}`
`334`	`335`
`335`	`336`	`private function evaluateFilter(string $expr, mixed $value): array`