Skip to content
This repository was archived by the owner on May 12, 2025. It is now read-only.

Commit 7a31afa

Browse files
authored
Temporary fix to prevent ingestion failure for invalid surrogate literals (#204)
* Temporary fix to prevent ingestion failure for invalid surrogate literals As a temporary fix, we detect the invalid surrogate pairs literal and throw ParseError. Later on, it should be reworked to parse and represent invalid surrogate pairs in a safe way.
1 parent de3ab04 commit 7a31afa

File tree

2 files changed

+49
-3
lines changed

2 files changed

+49
-3
lines changed

openrewrite/src/javascript/parser.ts

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ import {
2323
randomId,
2424
SourceFile
2525
} from "../core";
26-
import {binarySearch, compareTextSpans, getNextSibling, getPreviousSibling, TextSpan, hasFlowAnnotation, checkSyntaxErrors} from "./parserUtils";
26+
import {binarySearch, compareTextSpans, getNextSibling, getPreviousSibling, TextSpan, hasFlowAnnotation, checkSyntaxErrors, isValidSurrogateRange} from "./parserUtils";
2727
import {JavaScriptTypeMapping} from "./typeMapping";
2828
import path from "node:path";
2929
import {ExpressionStatement, TypeTreeExpression} from ".";
@@ -599,12 +599,19 @@ export class JavaScriptParserVisitor {
599599

600600
private mapLiteral(node: ts.LiteralExpression | ts.TrueLiteral | ts.FalseLiteral | ts.NullLiteral | ts.Identifier
601601
| ts.TemplateHead | ts.TemplateMiddle | ts.TemplateTail, value: any): J.Literal {
602+
603+
let valueSource = node.getText();
604+
if (!isValidSurrogateRange(valueSource)) {
605+
// TODO: Fix to prevent ingestion failure for invalid surrogate pairs. Should be reworked with J.Literal.UnicodeEscape
606+
throw new InvalidSurrogatesNotSupportedError();
607+
}
608+
602609
return new J.Literal(
603610
randomId(),
604611
this.prefix(node),
605612
Markers.EMPTY,
606613
value,
607-
node.getText(),
614+
valueSource,
608615
null,
609616
this.mapPrimitiveType(node)
610617
);
@@ -4116,3 +4123,10 @@ class FlowSyntaxNotSupportedError extends SyntaxError {
41164123
this.name = "FlowSyntaxNotSupportedError";
41174124
}
41184125
}
4126+
4127+
class InvalidSurrogatesNotSupportedError extends SyntaxError {
4128+
constructor(message: string = "String literal contains invalid surrogate pairs, that is not supported") {
4129+
super(message);
4130+
this.name = "InvalidSurrogatesNotSupportedError";
4131+
}
4132+
}

openrewrite/src/javascript/parserUtils.ts

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -224,8 +224,40 @@ const additionalCriticalCodes = new Set([
224224

225225
// errors code description available at https://github.com/microsoft/TypeScript/blob/main/src/compiler/diagnosticMessages.json
226226
const excludedCodes = new Set([1039, 1064, 1101, 1107, 1111, 1155, 1166, 1170, 1183, 1203, 1207, 1215, 1238, 1239, 1240, 1241, 1244, 1250,
227-
1251, 1252, 1253, 1254, 1308, 1314, 1315, 1324, 1329, 1335, 1338, 1340, 1343, 1344, 1345, 1355, 1360, 1378, 1432]);
227+
1251, 1252, 1253, 1254, 1308, 1314, 1315, 1324, 1329, 1335, 1338, 1340, 1343, 1344, 1345, 1355, 1360, 1375, 1378, 1432]);
228228

229229
function isCriticalDiagnostic(code: number): boolean {
230230
return (code > 1000 && code < 2000 && !excludedCodes.has(code)) || additionalCriticalCodes.has(code);
231231
}
232+
233+
export function isValidSurrogateRange(unicodeString: string): boolean {
234+
const matches = unicodeString.match(/(?<!\\)\\u([a-fA-F0-9]{4})/g);
235+
236+
if (!matches) {
237+
return true;
238+
}
239+
240+
const codes = matches.map(m => {
241+
const codePointStr = m.slice(2);
242+
const codePoint = parseInt(codePointStr, 16);
243+
return codePoint;
244+
});
245+
246+
const isHighSurrogate = (charCode: number): boolean => charCode >= 0xD800 && charCode <= 0xDBFF;
247+
const isLowSurrogate = (charCode: number): boolean => charCode >= 0xDC00 && charCode <= 0xDFFF;
248+
249+
for (let i = 0; i < codes.length; i++) {
250+
const c = codes[i];
251+
252+
if (isHighSurrogate(c)) {
253+
// Ensure that the high surrogate is followed by a valid low surrogate
254+
if (i + 1 >= codes.length || !isLowSurrogate(codes[i + 1])) {
255+
return false; // Invalid high surrogate or no low surrogate after it
256+
}
257+
i++; // Skip the low surrogate
258+
} else if (isLowSurrogate(c)) {
259+
return false; // Lone low surrogate (not preceded by a high surrogate)
260+
}
261+
}
262+
return true;
263+
}

0 commit comments

Comments
 (0)