Skip to content

Commit 7c27bd4

Browse files
committed
utf8: throw when encoding lone surrogates
Closes #81
1 parent 99bee56 commit 7c27bd4

File tree

2 files changed

+30
-1
lines changed

2 files changed

+30
-1
lines changed

packages/utf8/utf8.test.ts

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,4 +119,30 @@ describe("utf8", () => {
119119
const dec = decode(enc);
120120
expect(dec).toEqual(s);
121121
});
122+
123+
it("should reject invalid UTF-16 strings with unpaired surrogates", () => {
124+
// High surrogate without low surrogate
125+
expect(() => encode('\ud800')).toThrowError(/invalid string/);
126+
expect(() => encode('\udbff')).toThrowError(/invalid string/);
127+
128+
// Low surrogate without high surrogate
129+
expect(() => encode('\udc00')).toThrowError(/invalid string/);
130+
expect(() => encode('\udfff')).toThrowError(/invalid string/);
131+
132+
// High surrogate at the end of string
133+
expect(() => encode('hello\ud800')).toThrowError(/invalid string/);
134+
135+
// Low surrogate at the beginning
136+
expect(() => encode('\udc00world')).toThrowError(/invalid string/);
137+
138+
// Two high surrogates in a row (second one is unpaired)
139+
expect(() => encode('\ud800\ud800\udc00')).toThrowError(/invalid string/);
140+
141+
// Low surrogate followed by high surrogate (wrong order)
142+
expect(() => encode('\udc00\ud800')).toThrowError(/invalid string/);
143+
144+
// Valid surrogate pair should work
145+
expect(() => encode('\ud800\udc00')).not.toThrow(); // U+10000
146+
expect(() => encode('\udbff\udfff')).not.toThrow(); // U+10FFFF
147+
});
122148
});

packages/utf8/utf8.ts

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ export function encodedLength(s: string): number {
5353
let c = s.charCodeAt(i);
5454

5555
if (c >= 0xd800 && c <= 0xdbff) {
56-
// surrogate pair
56+
// High surrogate, must be followed by low surrogate.
5757
if (i === s.length - 1) {
5858
throw new Error(INVALID_UTF16);
5959
}
@@ -63,6 +63,9 @@ export function encodedLength(s: string): number {
6363
throw new Error(INVALID_UTF16);
6464
}
6565
c = ((c - 0xd800) << 10) + (c2 - 0xdc00) + 0x10000;
66+
} else if (c >= 0xdc00 && c <= 0xdfff) {
67+
// Low surrogate without preceding high surrogate.
68+
throw new Error(INVALID_UTF16);
6669
}
6770

6871
if (c < 0x80) {

0 commit comments

Comments
 (0)