diff --git a/internal/printer/utilities.go b/internal/printer/utilities.go index cc3839cba5a..02b3e7e51ff 100644 --- a/internal/printer/utilities.go +++ b/internal/printer/utilities.go @@ -71,6 +71,18 @@ func encodeUtf16EscapeSequence(b *strings.Builder, charCode rune) { b.WriteString(hexCharCode) } +// decodeCESU8OrUTF8 decodes a rune from s, recognizing CESU-8 encoded surrogate +// code units (0xD800–0xDFFF) that the scanner produces for lone surrogates in +// string literals. Standard utf8.DecodeRuneInString would replace these with +// U+FFFD, losing the distinction between different surrogates. +func decodeCESU8OrUTF8(s string) (rune, int) { + if len(s) >= 3 && s[0] == 0xED && s[1] >= 0xA0 && s[1] <= 0xBF && s[2] >= 0x80 && s[2] <= 0xBF { + r := rune(0xD000) | rune(s[1]&0x3F)<<6 | rune(s[2]&0x3F) + return r, 3 + } + return utf8.DecodeRuneInString(s) +} + // Based heavily on the abstract 'Quote'/'QuoteJSONString' operation from ECMA-262 (24.3.2.2), // but augmented for a few select characters (e.g. lineSeparator, paragraphSeparator, nextLine) // Note that this doesn't actually wrap the input in double quotes. @@ -78,7 +90,7 @@ func escapeStringWorker(s string, quoteChar QuoteChar, flags getLiteralTextFlags pos := 0 i := 0 for i < len(s) { - ch, size := utf8.DecodeRuneInString(s[i:]) + ch, size := decodeCESU8OrUTF8(s[i:]) escape := false @@ -104,7 +116,8 @@ func escapeStringWorker(s string, quoteChar QuoteChar, flags getLiteralTextFlags escape = true } default: - if ch <= '\u001f' || flags&getLiteralTextFlagsNeverAsciiEscape == 0 && ch > '\u007f' { + if ch <= '\u001f' || flags&getLiteralTextFlagsNeverAsciiEscape == 0 && ch > '\u007f' || + ch >= 0xD800 && ch <= 0xDFFF { escape = true } } diff --git a/internal/scanner/scanner.go b/internal/scanner/scanner.go index 4f6de45cca4..4dd3b5bcd6c 100644 --- a/internal/scanner/scanner.go +++ b/internal/scanner/scanner.go @@ -1749,6 +1749,9 @@ func (s *Scanner) scanEscapeSequence(flags EscapeSequenceScanningFlags) string { if codePoint < 0 { return s.text[start:s.pos] } + if codePointIsHighSurrogate(codePoint) || codePointIsLowSurrogate(codePoint) { + return encodeSurrogate(codePoint) + } return string(codePoint) } if codePoint < 0 { @@ -1764,13 +1767,11 @@ func (s *Scanner) scanEscapeSequence(flags EscapeSequenceScanningFlags) string { return string(surrogatePairToCodepoint(codePoint, nextCodePoint)) } s.pos = savedPos - if flags&EscapeSequenceScanningFlagsRegularExpression != 0 { - return encodeSurrogate(codePoint) - } - } else if (codePointIsHighSurrogate(codePoint) || codePointIsLowSurrogate(codePoint)) && - flags&EscapeSequenceScanningFlagsRegularExpression != 0 { - // Lone surrogate inside a non-unicode regex: encode as CESU-8 so scanClassRanges - // can compare surrogates numerically. Must NOT apply to string literals. + return encodeSurrogate(codePoint) + } else if codePointIsHighSurrogate(codePoint) || codePointIsLowSurrogate(codePoint) { + // Lone surrogate: encode as CESU-8 so that distinct surrogates remain + // distinguishable. Go's string(rune) would replace all surrogates with + // U+FFFD, collapsing e.g. "\uD800" and "\uDC00" into the same value. return encodeSurrogate(codePoint) } return string(codePoint) diff --git a/internal/stringutil/compare.go b/internal/stringutil/compare.go index e632405f145..64be7b76c7c 100644 --- a/internal/stringutil/compare.go +++ b/internal/stringutil/compare.go @@ -36,8 +36,8 @@ func CompareStringsCaseInsensitive(a string, b string) Comparison { return ComparisonEqual } for { - ca, sa := utf8.DecodeRuneInString(a) - cb, sb := utf8.DecodeRuneInString(b) + ca, sa := decodeCESU8OrUTF8(a) + cb, sb := decodeCESU8OrUTF8(b) if sa == 0 { if sb == 0 { return ComparisonEqual @@ -60,6 +60,18 @@ func CompareStringsCaseInsensitive(a string, b string) Comparison { } } +// decodeCESU8OrUTF8 decodes a rune from s, recognizing CESU-8 encoded surrogate +// code units (0xD800–0xDFFF) that the scanner produces for lone surrogates in +// string literals. Standard utf8.DecodeRuneInString would see these as invalid +// UTF-8 and return RuneError for each byte individually. +func decodeCESU8OrUTF8(s string) (rune, int) { + if len(s) >= 3 && s[0] == 0xED && s[1] >= 0xA0 && s[1] <= 0xBF && s[2] >= 0x80 && s[2] <= 0xBF { + r := rune(0xD000) | rune(s[1]&0x3F)<<6 | rune(s[2]&0x3F) + return r, 3 + } + return utf8.DecodeRuneInString(s) +} + func CompareStringsCaseSensitive(a string, b string) Comparison { return strings.Compare(a, b) } diff --git a/testdata/baselines/reference/compiler/loneSurrogateStringLiterals.errors.txt b/testdata/baselines/reference/compiler/loneSurrogateStringLiterals.errors.txt new file mode 100644 index 00000000000..9ea30044801 --- /dev/null +++ b/testdata/baselines/reference/compiler/loneSurrogateStringLiterals.errors.txt @@ -0,0 +1,61 @@ +loneSurrogateStringLiterals.ts(6,7): error TS2322: Type '"\uDC00"' is not assignable to type '"\uD800"'. +loneSurrogateStringLiterals.ts(7,7): error TS2322: Type '"\uD800"' is not assignable to type '"\uDC00"'. +loneSurrogateStringLiterals.ts(10,7): error TS2322: Type '"\uD801"' is not assignable to type '"\uD800"'. +loneSurrogateStringLiterals.ts(11,7): error TS2322: Type '"\uD800"' is not assignable to type '"\uD801"'. +loneSurrogateStringLiterals.ts(14,7): error TS2322: Type '"\uDC01"' is not assignable to type '"\uDC00"'. +loneSurrogateStringLiterals.ts(15,7): error TS2322: Type '"\uDC00"' is not assignable to type '"\uDC01"'. +loneSurrogateStringLiterals.ts(20,7): error TS2322: Type '"\uDC00"' is not assignable to type '"\uD800"'. +loneSurrogateStringLiterals.ts(21,7): error TS2322: Type '"\uD800"' is not assignable to type '"\uDC00"'. +loneSurrogateStringLiterals.ts(26,7): error TS2322: Type '"\uDC00"' is not assignable to type '"\uD800"'. +loneSurrogateStringLiterals.ts(27,7): error TS2322: Type '"\uDC00"' is not assignable to type '"\uD800"'. + + +==== loneSurrogateStringLiterals.ts (10 errors) ==== + // Lone surrogates should be distinct string literal types + const highSurrogate: "\uD800" = "\uD800"; // ok + const lowSurrogate: "\uDC00" = "\uDC00"; // ok + + // These should be errors - different surrogates are not assignable to each other + const highToLow: "\uD800" = "\uDC00"; // error + ~~~~~~~~~ +!!! error TS2322: Type '"\uDC00"' is not assignable to type '"\uD800"'. + const lowToHigh: "\uDC00" = "\uD800"; // error + ~~~~~~~~~ +!!! error TS2322: Type '"\uD800"' is not assignable to type '"\uDC00"'. + + // Different high surrogates should also be distinct + const high1: "\uD800" = "\uD801"; // error + ~~~~~ +!!! error TS2322: Type '"\uD801"' is not assignable to type '"\uD800"'. + const high2: "\uD801" = "\uD800"; // error + ~~~~~ +!!! error TS2322: Type '"\uD800"' is not assignable to type '"\uD801"'. + + // Different low surrogates should also be distinct + const low1: "\uDC00" = "\uDC01"; // error + ~~~~ +!!! error TS2322: Type '"\uDC01"' is not assignable to type '"\uDC00"'. + const low2: "\uDC01" = "\uDC00"; // error + ~~~~ +!!! error TS2322: Type '"\uDC00"' is not assignable to type '"\uDC01"'. + + // Extended Unicode escape syntax should also work + const extHigh: "\u{D800}" = "\u{D800}"; // ok + const extLow: "\u{DC00}" = "\u{DC00}"; // ok + const extHighToLow: "\u{D800}" = "\u{DC00}"; // error + ~~~~~~~~~~~~ +!!! error TS2322: Type '"\uDC00"' is not assignable to type '"\uD800"'. + const extLowToHigh: "\u{DC00}" = "\u{D800}"; // error + ~~~~~~~~~~~~ +!!! error TS2322: Type '"\uD800"' is not assignable to type '"\uDC00"'. + + // Mixed syntax should also be equivalent + const mixedHigh: "\uD800" = "\u{D800}"; // ok + const mixedLow: "\u{DC00}" = "\uDC00"; // ok + const mixedError1: "\uD800" = "\u{DC00}"; // error + ~~~~~~~~~~~ +!!! error TS2322: Type '"\uDC00"' is not assignable to type '"\uD800"'. + const mixedError2: "\u{D800}" = "\uDC00"; // error + ~~~~~~~~~~~ +!!! error TS2322: Type '"\uDC00"' is not assignable to type '"\uD800"'. + \ No newline at end of file diff --git a/testdata/baselines/reference/compiler/loneSurrogateStringLiterals.js b/testdata/baselines/reference/compiler/loneSurrogateStringLiterals.js new file mode 100644 index 00000000000..bf6091a098c --- /dev/null +++ b/testdata/baselines/reference/compiler/loneSurrogateStringLiterals.js @@ -0,0 +1,56 @@ +//// [tests/cases/compiler/loneSurrogateStringLiterals.ts] //// + +//// [loneSurrogateStringLiterals.ts] +// Lone surrogates should be distinct string literal types +const highSurrogate: "\uD800" = "\uD800"; // ok +const lowSurrogate: "\uDC00" = "\uDC00"; // ok + +// These should be errors - different surrogates are not assignable to each other +const highToLow: "\uD800" = "\uDC00"; // error +const lowToHigh: "\uDC00" = "\uD800"; // error + +// Different high surrogates should also be distinct +const high1: "\uD800" = "\uD801"; // error +const high2: "\uD801" = "\uD800"; // error + +// Different low surrogates should also be distinct +const low1: "\uDC00" = "\uDC01"; // error +const low2: "\uDC01" = "\uDC00"; // error + +// Extended Unicode escape syntax should also work +const extHigh: "\u{D800}" = "\u{D800}"; // ok +const extLow: "\u{DC00}" = "\u{DC00}"; // ok +const extHighToLow: "\u{D800}" = "\u{DC00}"; // error +const extLowToHigh: "\u{DC00}" = "\u{D800}"; // error + +// Mixed syntax should also be equivalent +const mixedHigh: "\uD800" = "\u{D800}"; // ok +const mixedLow: "\u{DC00}" = "\uDC00"; // ok +const mixedError1: "\uD800" = "\u{DC00}"; // error +const mixedError2: "\u{D800}" = "\uDC00"; // error + + +//// [loneSurrogateStringLiterals.js] +"use strict"; +// Lone surrogates should be distinct string literal types +const highSurrogate = "\uD800"; // ok +const lowSurrogate = "\uDC00"; // ok +// These should be errors - different surrogates are not assignable to each other +const highToLow = "\uDC00"; // error +const lowToHigh = "\uD800"; // error +// Different high surrogates should also be distinct +const high1 = "\uD801"; // error +const high2 = "\uD800"; // error +// Different low surrogates should also be distinct +const low1 = "\uDC01"; // error +const low2 = "\uDC00"; // error +// Extended Unicode escape syntax should also work +const extHigh = "\u{D800}"; // ok +const extLow = "\u{DC00}"; // ok +const extHighToLow = "\u{DC00}"; // error +const extLowToHigh = "\u{D800}"; // error +// Mixed syntax should also be equivalent +const mixedHigh = "\u{D800}"; // ok +const mixedLow = "\uDC00"; // ok +const mixedError1 = "\u{DC00}"; // error +const mixedError2 = "\uDC00"; // error diff --git a/testdata/baselines/reference/compiler/loneSurrogateStringLiterals.symbols b/testdata/baselines/reference/compiler/loneSurrogateStringLiterals.symbols new file mode 100644 index 00000000000..16c16252a12 --- /dev/null +++ b/testdata/baselines/reference/compiler/loneSurrogateStringLiterals.symbols @@ -0,0 +1,57 @@ +//// [tests/cases/compiler/loneSurrogateStringLiterals.ts] //// + +=== loneSurrogateStringLiterals.ts === +// Lone surrogates should be distinct string literal types +const highSurrogate: "\uD800" = "\uD800"; // ok +>highSurrogate : Symbol(highSurrogate, Decl(loneSurrogateStringLiterals.ts, 1, 5)) + +const lowSurrogate: "\uDC00" = "\uDC00"; // ok +>lowSurrogate : Symbol(lowSurrogate, Decl(loneSurrogateStringLiterals.ts, 2, 5)) + +// These should be errors - different surrogates are not assignable to each other +const highToLow: "\uD800" = "\uDC00"; // error +>highToLow : Symbol(highToLow, Decl(loneSurrogateStringLiterals.ts, 5, 5)) + +const lowToHigh: "\uDC00" = "\uD800"; // error +>lowToHigh : Symbol(lowToHigh, Decl(loneSurrogateStringLiterals.ts, 6, 5)) + +// Different high surrogates should also be distinct +const high1: "\uD800" = "\uD801"; // error +>high1 : Symbol(high1, Decl(loneSurrogateStringLiterals.ts, 9, 5)) + +const high2: "\uD801" = "\uD800"; // error +>high2 : Symbol(high2, Decl(loneSurrogateStringLiterals.ts, 10, 5)) + +// Different low surrogates should also be distinct +const low1: "\uDC00" = "\uDC01"; // error +>low1 : Symbol(low1, Decl(loneSurrogateStringLiterals.ts, 13, 5)) + +const low2: "\uDC01" = "\uDC00"; // error +>low2 : Symbol(low2, Decl(loneSurrogateStringLiterals.ts, 14, 5)) + +// Extended Unicode escape syntax should also work +const extHigh: "\u{D800}" = "\u{D800}"; // ok +>extHigh : Symbol(extHigh, Decl(loneSurrogateStringLiterals.ts, 17, 5)) + +const extLow: "\u{DC00}" = "\u{DC00}"; // ok +>extLow : Symbol(extLow, Decl(loneSurrogateStringLiterals.ts, 18, 5)) + +const extHighToLow: "\u{D800}" = "\u{DC00}"; // error +>extHighToLow : Symbol(extHighToLow, Decl(loneSurrogateStringLiterals.ts, 19, 5)) + +const extLowToHigh: "\u{DC00}" = "\u{D800}"; // error +>extLowToHigh : Symbol(extLowToHigh, Decl(loneSurrogateStringLiterals.ts, 20, 5)) + +// Mixed syntax should also be equivalent +const mixedHigh: "\uD800" = "\u{D800}"; // ok +>mixedHigh : Symbol(mixedHigh, Decl(loneSurrogateStringLiterals.ts, 23, 5)) + +const mixedLow: "\u{DC00}" = "\uDC00"; // ok +>mixedLow : Symbol(mixedLow, Decl(loneSurrogateStringLiterals.ts, 24, 5)) + +const mixedError1: "\uD800" = "\u{DC00}"; // error +>mixedError1 : Symbol(mixedError1, Decl(loneSurrogateStringLiterals.ts, 25, 5)) + +const mixedError2: "\u{D800}" = "\uDC00"; // error +>mixedError2 : Symbol(mixedError2, Decl(loneSurrogateStringLiterals.ts, 26, 5)) + diff --git a/testdata/baselines/reference/compiler/loneSurrogateStringLiterals.types b/testdata/baselines/reference/compiler/loneSurrogateStringLiterals.types new file mode 100644 index 00000000000..1451573d294 --- /dev/null +++ b/testdata/baselines/reference/compiler/loneSurrogateStringLiterals.types @@ -0,0 +1,73 @@ +//// [tests/cases/compiler/loneSurrogateStringLiterals.ts] //// + +=== loneSurrogateStringLiterals.ts === +// Lone surrogates should be distinct string literal types +const highSurrogate: "\uD800" = "\uD800"; // ok +>highSurrogate : "\uD800" +>"\uD800" : "\uD800" + +const lowSurrogate: "\uDC00" = "\uDC00"; // ok +>lowSurrogate : "\uDC00" +>"\uDC00" : "\uDC00" + +// These should be errors - different surrogates are not assignable to each other +const highToLow: "\uD800" = "\uDC00"; // error +>highToLow : "\uD800" +>"\uDC00" : "\uDC00" + +const lowToHigh: "\uDC00" = "\uD800"; // error +>lowToHigh : "\uDC00" +>"\uD800" : "\uD800" + +// Different high surrogates should also be distinct +const high1: "\uD800" = "\uD801"; // error +>high1 : "\uD800" +>"\uD801" : "\uD801" + +const high2: "\uD801" = "\uD800"; // error +>high2 : "\uD801" +>"\uD800" : "\uD800" + +// Different low surrogates should also be distinct +const low1: "\uDC00" = "\uDC01"; // error +>low1 : "\uDC00" +>"\uDC01" : "\uDC01" + +const low2: "\uDC01" = "\uDC00"; // error +>low2 : "\uDC01" +>"\uDC00" : "\uDC00" + +// Extended Unicode escape syntax should also work +const extHigh: "\u{D800}" = "\u{D800}"; // ok +>extHigh : "\uD800" +>"\u{D800}" : "\uD800" + +const extLow: "\u{DC00}" = "\u{DC00}"; // ok +>extLow : "\uDC00" +>"\u{DC00}" : "\uDC00" + +const extHighToLow: "\u{D800}" = "\u{DC00}"; // error +>extHighToLow : "\uD800" +>"\u{DC00}" : "\uDC00" + +const extLowToHigh: "\u{DC00}" = "\u{D800}"; // error +>extLowToHigh : "\uDC00" +>"\u{D800}" : "\uD800" + +// Mixed syntax should also be equivalent +const mixedHigh: "\uD800" = "\u{D800}"; // ok +>mixedHigh : "\uD800" +>"\u{D800}" : "\uD800" + +const mixedLow: "\u{DC00}" = "\uDC00"; // ok +>mixedLow : "\uDC00" +>"\uDC00" : "\uDC00" + +const mixedError1: "\uD800" = "\u{DC00}"; // error +>mixedError1 : "\uD800" +>"\u{DC00}" : "\uDC00" + +const mixedError2: "\u{D800}" = "\uDC00"; // error +>mixedError2 : "\uD800" +>"\uDC00" : "\uDC00" + diff --git a/testdata/baselines/reference/compiler/unicodeSurrogatesInStringLiterals.js b/testdata/baselines/reference/compiler/unicodeSurrogatesInStringLiterals.js index e33c239550e..0bb9e03002e 100644 --- a/testdata/baselines/reference/compiler/unicodeSurrogatesInStringLiterals.js +++ b/testdata/baselines/reference/compiler/unicodeSurrogatesInStringLiterals.js @@ -37,8 +37,8 @@ export const lowHigh = "\ude03\ud83d"; //// [unicodeSurrogatesInStringLiterals.d.ts] export declare const highLow: "\uD83D\uDE03"; -export declare const high: "\uFFFD"; -export declare const low: "\uFFFD"; -export declare const highHigh: "\uFFFD\uFFFD"; -export declare const lowLow: "\uFFFD\uFFFD"; -export declare const lowHigh: "\uFFFD\uFFFD"; +export declare const high: "\uD83D"; +export declare const low: "\uDE03"; +export declare const highHigh: "\uD83D\uD83D"; +export declare const lowLow: "\uDE03\uDE03"; +export declare const lowHigh: "\uDE03\uD83D"; diff --git a/testdata/baselines/reference/compiler/unicodeSurrogatesInStringLiterals.types b/testdata/baselines/reference/compiler/unicodeSurrogatesInStringLiterals.types index 26778de70b4..628cad72d66 100644 --- a/testdata/baselines/reference/compiler/unicodeSurrogatesInStringLiterals.types +++ b/testdata/baselines/reference/compiler/unicodeSurrogatesInStringLiterals.types @@ -9,31 +9,31 @@ export const highLow = "\ud83d\ude03" as const; // high surrogate export const high = "\ud83d" as const; ->high : "�" ->"\ud83d" as const : "�" ->"\ud83d" : "�" +>high : "\uD83D" +>"\ud83d" as const : "\uD83D" +>"\ud83d" : "\uD83D" // low surrogate export const low = "\ude03" as const; ->low : "�" ->"\ude03" as const : "�" ->"\ude03" : "�" +>low : "\uDE03" +>"\ude03" as const : "\uDE03" +>"\ude03" : "\uDE03" // two high surrogates export const highHigh = "\ud83d\ud83d" as const; ->highHigh : "��" ->"\ud83d\ud83d" as const : "��" ->"\ud83d\ud83d" : "��" +>highHigh : "\uD83D\uD83D" +>"\ud83d\ud83d" as const : "\uD83D\uD83D" +>"\ud83d\ud83d" : "\uD83D\uD83D" // two low surrogates export const lowLow = "\ude03\ude03" as const; ->lowLow : "��" ->"\ude03\ude03" as const : "��" ->"\ude03\ude03" : "��" +>lowLow : "\uDE03\uDE03" +>"\ude03\ude03" as const : "\uDE03\uDE03" +>"\ude03\ude03" : "\uDE03\uDE03" // swapped expected order of surrogates export const lowHigh = "\ude03\ud83d" as const; ->lowHigh : "��" ->"\ude03\ud83d" as const : "��" ->"\ude03\ud83d" : "��" +>lowHigh : "\uDE03\uD83D" +>"\ude03\ud83d" as const : "\uDE03\uD83D" +>"\ude03\ud83d" : "\uDE03\uD83D" diff --git a/testdata/baselines/reference/submodule/conformance/unicodeExtendedEscapesInStrings10(target=es6).types b/testdata/baselines/reference/submodule/conformance/unicodeExtendedEscapesInStrings10(target=es6).types index a30aea55871..787e81e870e 100644 --- a/testdata/baselines/reference/submodule/conformance/unicodeExtendedEscapesInStrings10(target=es6).types +++ b/testdata/baselines/reference/submodule/conformance/unicodeExtendedEscapesInStrings10(target=es6).types @@ -7,5 +7,5 @@ // this is a useful edge-case test. var x = "\u{D800}"; >x : string ->"\u{D800}" : "�" +>"\u{D800}" : "\uD800" diff --git a/testdata/baselines/reference/submodule/conformance/unicodeExtendedEscapesInStrings10(target=es6).types.diff b/testdata/baselines/reference/submodule/conformance/unicodeExtendedEscapesInStrings10(target=es6).types.diff new file mode 100644 index 00000000000..773d7df16ab --- /dev/null +++ b/testdata/baselines/reference/submodule/conformance/unicodeExtendedEscapesInStrings10(target=es6).types.diff @@ -0,0 +1,8 @@ +--- old.unicodeExtendedEscapesInStrings10(target=es6).types ++++ new.unicodeExtendedEscapesInStrings10(target=es6).types +@@= skipped -6, +6 lines =@@ + // this is a useful edge-case test. + var x = "\u{D800}"; + >x : string +->"\u{D800}" : "�" ++>"\u{D800}" : "\uD800" diff --git a/testdata/baselines/reference/submodule/conformance/unicodeExtendedEscapesInStrings11(target=es6).types b/testdata/baselines/reference/submodule/conformance/unicodeExtendedEscapesInStrings11(target=es6).types index 3dca2fcedbc..d86a0b622ce 100644 --- a/testdata/baselines/reference/submodule/conformance/unicodeExtendedEscapesInStrings11(target=es6).types +++ b/testdata/baselines/reference/submodule/conformance/unicodeExtendedEscapesInStrings11(target=es6).types @@ -7,5 +7,5 @@ // this is a useful edge-case test. var x = "\u{DC00}"; >x : string ->"\u{DC00}" : "�" +>"\u{DC00}" : "\uDC00" diff --git a/testdata/baselines/reference/submodule/conformance/unicodeExtendedEscapesInStrings11(target=es6).types.diff b/testdata/baselines/reference/submodule/conformance/unicodeExtendedEscapesInStrings11(target=es6).types.diff new file mode 100644 index 00000000000..cb029e4f663 --- /dev/null +++ b/testdata/baselines/reference/submodule/conformance/unicodeExtendedEscapesInStrings11(target=es6).types.diff @@ -0,0 +1,8 @@ +--- old.unicodeExtendedEscapesInStrings11(target=es6).types ++++ new.unicodeExtendedEscapesInStrings11(target=es6).types +@@= skipped -6, +6 lines =@@ + // this is a useful edge-case test. + var x = "\u{DC00}"; + >x : string +->"\u{DC00}" : "�" ++>"\u{DC00}" : "\uDC00" diff --git a/testdata/baselines/reference/submodule/conformance/unicodeExtendedEscapesInTemplates10(target=es6).types b/testdata/baselines/reference/submodule/conformance/unicodeExtendedEscapesInTemplates10(target=es6).types index 24cb2b3e476..9c0bbc0d428 100644 --- a/testdata/baselines/reference/submodule/conformance/unicodeExtendedEscapesInTemplates10(target=es6).types +++ b/testdata/baselines/reference/submodule/conformance/unicodeExtendedEscapesInTemplates10(target=es6).types @@ -7,5 +7,5 @@ // this is a useful edge-case test. var x = `\u{D800}`; >x : string ->`\u{D800}` : "�" +>`\u{D800}` : "\uD800" diff --git a/testdata/baselines/reference/submodule/conformance/unicodeExtendedEscapesInTemplates10(target=es6).types.diff b/testdata/baselines/reference/submodule/conformance/unicodeExtendedEscapesInTemplates10(target=es6).types.diff new file mode 100644 index 00000000000..8c03ba46c34 --- /dev/null +++ b/testdata/baselines/reference/submodule/conformance/unicodeExtendedEscapesInTemplates10(target=es6).types.diff @@ -0,0 +1,8 @@ +--- old.unicodeExtendedEscapesInTemplates10(target=es6).types ++++ new.unicodeExtendedEscapesInTemplates10(target=es6).types +@@= skipped -6, +6 lines =@@ + // this is a useful edge-case test. + var x = `\u{D800}`; + >x : string +->`\u{D800}` : "�" ++>`\u{D800}` : "\uD800" diff --git a/testdata/baselines/reference/submodule/conformance/unicodeExtendedEscapesInTemplates11(target=es6).types b/testdata/baselines/reference/submodule/conformance/unicodeExtendedEscapesInTemplates11(target=es6).types index ba2535fe4e0..1a925f4dd86 100644 --- a/testdata/baselines/reference/submodule/conformance/unicodeExtendedEscapesInTemplates11(target=es6).types +++ b/testdata/baselines/reference/submodule/conformance/unicodeExtendedEscapesInTemplates11(target=es6).types @@ -7,5 +7,5 @@ // this is a useful edge-case test. var x = `\u{DC00}`; >x : string ->`\u{DC00}` : "�" +>`\u{DC00}` : "\uDC00" diff --git a/testdata/baselines/reference/submodule/conformance/unicodeExtendedEscapesInTemplates11(target=es6).types.diff b/testdata/baselines/reference/submodule/conformance/unicodeExtendedEscapesInTemplates11(target=es6).types.diff new file mode 100644 index 00000000000..b0b5842acbb --- /dev/null +++ b/testdata/baselines/reference/submodule/conformance/unicodeExtendedEscapesInTemplates11(target=es6).types.diff @@ -0,0 +1,8 @@ +--- old.unicodeExtendedEscapesInTemplates11(target=es6).types ++++ new.unicodeExtendedEscapesInTemplates11(target=es6).types +@@= skipped -6, +6 lines =@@ + // this is a useful edge-case test. + var x = `\u{DC00}`; + >x : string +->`\u{DC00}` : "�" ++>`\u{DC00}` : "\uDC00" diff --git a/testdata/tests/cases/compiler/loneSurrogateStringLiterals.ts b/testdata/tests/cases/compiler/loneSurrogateStringLiterals.ts new file mode 100644 index 00000000000..29ff00d6e99 --- /dev/null +++ b/testdata/tests/cases/compiler/loneSurrogateStringLiterals.ts @@ -0,0 +1,29 @@ +// @strict: true + +// Lone surrogates should be distinct string literal types +const highSurrogate: "\uD800" = "\uD800"; // ok +const lowSurrogate: "\uDC00" = "\uDC00"; // ok + +// These should be errors - different surrogates are not assignable to each other +const highToLow: "\uD800" = "\uDC00"; // error +const lowToHigh: "\uDC00" = "\uD800"; // error + +// Different high surrogates should also be distinct +const high1: "\uD800" = "\uD801"; // error +const high2: "\uD801" = "\uD800"; // error + +// Different low surrogates should also be distinct +const low1: "\uDC00" = "\uDC01"; // error +const low2: "\uDC01" = "\uDC00"; // error + +// Extended Unicode escape syntax should also work +const extHigh: "\u{D800}" = "\u{D800}"; // ok +const extLow: "\u{DC00}" = "\u{DC00}"; // ok +const extHighToLow: "\u{D800}" = "\u{DC00}"; // error +const extLowToHigh: "\u{DC00}" = "\u{D800}"; // error + +// Mixed syntax should also be equivalent +const mixedHigh: "\uD800" = "\u{D800}"; // ok +const mixedLow: "\u{DC00}" = "\uDC00"; // ok +const mixedError1: "\uD800" = "\u{DC00}"; // error +const mixedError2: "\u{D800}" = "\uDC00"; // error