Skip to content

Commit 444fdad

Browse files
committed
Add new safeDecodeURIComponent function
1 parent 7afa766 commit 444fdad

File tree

2 files changed

+344
-5
lines changed

2 files changed

+344
-5
lines changed

library/helpers/safeDecodeURIComponent.test.ts

Lines changed: 228 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,22 @@
11
import * as t from "tap";
22
import { safeDecodeURIComponent } from "./safeDecodeURIComponent";
33

4-
t.test("it decodes a URI component", async (t) => {
4+
t.setTimeout(60000);
5+
6+
t.test("it decodes a URI component (static tests)", async (t) => {
57
t.equal(safeDecodeURIComponent("%20"), " ");
68
t.equal(safeDecodeURIComponent("%3A"), ":");
79
t.equal(safeDecodeURIComponent("%2F"), "/");
10+
t.equal(safeDecodeURIComponent("%252F"), "%2F");
811
t.equal(safeDecodeURIComponent("test%20test"), "test test");
912
t.equal(safeDecodeURIComponent("test%3Atest"), "test:test");
13+
t.equal(safeDecodeURIComponent(encodeURIComponent("✨")), "✨");
14+
t.equal(safeDecodeURIComponent(encodeURIComponent("💜")), "💜");
15+
t.equal(safeDecodeURIComponent(encodeURIComponent("اللغة")), "اللغة");
16+
t.equal(safeDecodeURIComponent(encodeURIComponent("Γλώσσα")), "Γλώσσα");
17+
t.equal(safeDecodeURIComponent(encodeURIComponent("言語")), "言語");
18+
t.equal(safeDecodeURIComponent(encodeURIComponent("语言")), "语言");
19+
t.equal(safeDecodeURIComponent(encodeURIComponent("語言")), "語言");
1020
});
1121

1222
t.test("it returns undefined for invalid URI components", async (t) => {
@@ -16,4 +26,221 @@ t.test("it returns undefined for invalid URI components", async (t) => {
1626
t.equal(safeDecodeURIComponent("%2g"), undefined);
1727
t.equal(safeDecodeURIComponent("test%gtest"), undefined);
1828
t.equal(safeDecodeURIComponent("test%test"), undefined);
29+
t.equal(safeDecodeURIComponent("%99"), undefined);
30+
});
31+
32+
function generateRandomTestString(
33+
length = Math.floor(Math.random() * 100) + 1
34+
) {
35+
let result = "";
36+
const characters =
37+
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789.-_~%+";
38+
for (let i = 0; i < length; i++) {
39+
result += characters.charAt(Math.floor(Math.random() * characters.length));
40+
}
41+
return result;
42+
}
43+
44+
const testCases = [
45+
"test",
46+
"42",
47+
"a+b+c+d",
48+
"=a",
49+
"%25",
50+
"%%25%%",
51+
"st%C3%A5le",
52+
"st%C3%A5le%",
53+
"%st%C3%A5le%",
54+
"%%7Bst%C3%A5le%7D%",
55+
"%ab%C3%A5le%",
56+
"%C3%A5%able%",
57+
"%7B%ab%7C%de%7D",
58+
"%7B%ab%%7C%de%%7D",
59+
"%7 B%ab%%7C%de%%7 D",
60+
"%61+%4d%4D",
61+
"\uFEFFtest",
62+
"\uFEFF",
63+
"%EF%BB%BFtest",
64+
"%EF%BB%BF",
65+
"†",
66+
"%C2%B5",
67+
"%C2%B5%",
68+
"%%C2%B5%",
69+
"%ab",
70+
"%ab%ab%ab",
71+
"%",
72+
"%2",
73+
"%E0%A4%A",
74+
'/test/hel%"Flo',
75+
"/test/hel%2Flo",
76+
"%E8%AF%AD%E8%A8%80",
77+
"%",
78+
"%2",
79+
"%2G",
80+
"%2g",
81+
"%2g%2g",
82+
"test%2gtest",
83+
"test%test",
84+
"%99",
85+
];
86+
87+
const randomTestCases = 10_000;
88+
const randomStrings = Array.from({ length: randomTestCases }, () =>
89+
generateRandomTestString()
90+
);
91+
92+
const allTestCases = [...testCases, ...randomStrings];
93+
94+
t.test("compare with original decodeURIComponent", async (t) => {
95+
for (const testCase of allTestCases) {
96+
let origResult = undefined;
97+
try {
98+
origResult = decodeURIComponent(testCase);
99+
} catch {
100+
//
101+
}
102+
t.equal(safeDecodeURIComponent(testCase), origResult);
103+
}
104+
});
105+
106+
t.test("benchmark", async (t) => {
107+
const startOrig = performance.now();
108+
for (const testCase of allTestCases) {
109+
try {
110+
decodeURIComponent(testCase);
111+
} catch {
112+
//
113+
}
114+
}
115+
const endOrig = performance.now();
116+
117+
const startSafe = performance.now();
118+
for (const testCase of allTestCases) {
119+
safeDecodeURIComponent(testCase);
120+
}
121+
const endSafe = performance.now();
122+
123+
const origDuration = endOrig - startOrig;
124+
const safeDuration = endSafe - startSafe;
125+
t.ok(
126+
safeDuration < origDuration,
127+
`safeDecodeURIComponent is faster than decodeURIComponent`
128+
);
129+
130+
const origSpeedup = (origDuration - safeDuration) / origDuration;
131+
t.ok(
132+
origSpeedup > 0.7,
133+
`safeDecodeURIComponent is at least 70% faster than decodeURIComponent`
134+
);
135+
t.comment(`Perdormance improvement: ${origSpeedup * 100}%`);
136+
});
137+
138+
// The following tests are ported from test262, the Official ECMAScript Conformance Test Suite
139+
// https://github.com/tc39/test262
140+
// Licensed under the MIT License
141+
// Copyright (C) 2012 Ecma International
142+
143+
t.test("S15.1.3.2_A1.10_T1", async (t) => {
144+
const interval = [
145+
[0x00, 0x2f],
146+
[0x3a, 0x40],
147+
[0x47, 0x60],
148+
[0x67, 0xffff],
149+
];
150+
for (let indexI = 0; indexI < interval.length; indexI++) {
151+
for (
152+
let indexJ = interval[indexI][0];
153+
indexJ <= interval[indexI][1];
154+
indexJ++
155+
) {
156+
t.equal(
157+
safeDecodeURIComponent("%C0%" + String.fromCharCode(indexJ, indexJ)),
158+
undefined
159+
);
160+
}
161+
}
162+
});
163+
164+
t.test("S15.1.3.2_A1.11_T1", async (t) => {
165+
const interval = [
166+
[0x00, 0x2f],
167+
[0x3a, 0x40],
168+
[0x47, 0x60],
169+
[0x67, 0xffff],
170+
];
171+
for (let indexI = 0; indexI < interval.length; indexI++) {
172+
for (
173+
let indexJ = interval[indexI][0];
174+
indexJ <= interval[indexI][1];
175+
indexJ++
176+
) {
177+
t.equal(
178+
safeDecodeURIComponent(
179+
"%E0%" + String.fromCharCode(indexJ, indexJ) + "%A0"
180+
),
181+
undefined
182+
);
183+
}
184+
}
185+
});
186+
187+
t.test("S15.1.3.2_A1.1_T1", async (t) => {
188+
t.equal(safeDecodeURIComponent("%"), undefined);
189+
t.equal(safeDecodeURIComponent("%A"), undefined);
190+
t.equal(safeDecodeURIComponent("%1"), undefined);
191+
t.equal(safeDecodeURIComponent("% "), undefined);
192+
});
193+
194+
t.test("S15.1.3.2_A3_T1", async (t) => {
195+
t.equal(safeDecodeURIComponent("%3B"), ";");
196+
t.equal(safeDecodeURIComponent("%2F"), "/");
197+
t.equal(safeDecodeURIComponent("%3F"), "?");
198+
t.equal(safeDecodeURIComponent("%3A"), ":");
199+
t.equal(safeDecodeURIComponent("%40"), "@");
200+
t.equal(safeDecodeURIComponent("%26"), "&");
201+
t.equal(safeDecodeURIComponent("%3D"), "=");
202+
t.equal(safeDecodeURIComponent("%2B"), "+");
203+
t.equal(safeDecodeURIComponent("%24"), "$");
204+
t.equal(safeDecodeURIComponent("%2C"), ",");
205+
t.equal(safeDecodeURIComponent("%23"), "#");
206+
});
207+
208+
t.test("S15.1.3.2_A3_T2", async (t) => {
209+
t.equal(safeDecodeURIComponent("%3b"), ";");
210+
t.equal(safeDecodeURIComponent("%2f"), "/");
211+
t.equal(safeDecodeURIComponent("%3f"), "?");
212+
t.equal(safeDecodeURIComponent("%3a"), ":");
213+
t.equal(safeDecodeURIComponent("%40"), "@");
214+
t.equal(safeDecodeURIComponent("%26"), "&");
215+
t.equal(safeDecodeURIComponent("%3d"), "=");
216+
t.equal(safeDecodeURIComponent("%2b"), "+");
217+
t.equal(safeDecodeURIComponent("%24"), "$");
218+
t.equal(safeDecodeURIComponent("%2c"), ",");
219+
t.equal(safeDecodeURIComponent("%23"), "#");
220+
});
221+
222+
t.test("S15.1.3.2_A3_T3", async (t) => {
223+
t.equal(
224+
safeDecodeURIComponent("%3B%2F%3F%3A%40%26%3D%2B%24%2C%23"),
225+
";/?:@&=+$,#"
226+
);
227+
t.equal(
228+
safeDecodeURIComponent("%3b%2f%3f%3a%40%26%3d%2b%24%2c%23"),
229+
";/?:@&=+$,#"
230+
);
231+
});
232+
233+
t.test("S15.1.3.2_A4_T1", async (t) => {
234+
t.equal(
235+
safeDecodeURIComponent(
236+
"%41%42%43%44%45%46%47%48%49%4A%4B%4C%4D%4E%4F%50%51%52%53%54%55%56%57%58%59%5A"
237+
),
238+
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
239+
);
240+
t.equal(
241+
safeDecodeURIComponent(
242+
"%61%62%63%64%65%66%67%68%69%6A%6B%6C%6D%6E%6F%70%71%72%73%74%75%76%77%78%79%7A"
243+
),
244+
"abcdefghijklmnopqrstuvwxyz"
245+
);
19246
});
Lines changed: 116 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,119 @@
1+
// Based on https://github.com/delvedor/fast-decode-uri-component
2+
// Licensed under the MIT License
3+
// Copyright (c) 2018 Tomas Della Vedova
4+
// Copyright (c) 2017 Justin Ridgewell
5+
// Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de>
6+
7+
const UTF8_ACCEPT = 12;
8+
const UTF8_REJECT = 0;
9+
const UTF8_DATA = [
10+
// The first part of the table maps bytes to character to a transition.
11+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
14+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
15+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
16+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
17+
2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
18+
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
19+
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7,
20+
7, 7, 7, 8, 7, 7, 10, 9, 9, 9, 11, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
21+
22+
// The second part of the table maps a state to a new state when adding a
23+
// transition.
24+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 0, 0, 0, 0, 24, 36, 48, 60, 72, 84,
25+
96, 0, 12, 12, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 0, 0, 0, 0, 0, 0, 0,
26+
0, 0, 24, 24, 24, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 24, 0, 0, 0, 0, 0, 0, 0, 0,
27+
0, 0, 48, 48, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 48, 0, 0, 0, 0, 0, 0, 0,
28+
0, 0, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
29+
30+
// The third part maps the current transition to a mask that needs to apply
31+
// to the byte.
32+
0x7f, 0x3f, 0x3f, 0x3f, 0x00, 0x1f, 0x0f, 0x0f, 0x0f, 0x07, 0x07, 0x07,
33+
];
34+
35+
const HEX = {
36+
"0": 0,
37+
"1": 1,
38+
"2": 2,
39+
"3": 3,
40+
"4": 4,
41+
"5": 5,
42+
"6": 6,
43+
"7": 7,
44+
"8": 8,
45+
"9": 9,
46+
a: 10,
47+
A: 10,
48+
b: 11,
49+
B: 11,
50+
c: 12,
51+
C: 12,
52+
d: 13,
53+
D: 13,
54+
e: 14,
55+
E: 14,
56+
f: 15,
57+
F: 15,
58+
};
59+
60+
/**
61+
* Decodes a URI component, returning undefined if the URI is invalid.
62+
* This function is a safe alternative to `decodeURIComponent`, which throws
63+
* an error if the URI is malformed.
64+
*/
165
export function safeDecodeURIComponent(uri: string): string | undefined {
2-
try {
3-
return decodeURIComponent(uri);
4-
} catch {
5-
return undefined;
66+
let percentPosition = uri.indexOf("%");
67+
if (percentPosition === -1) {
68+
// Uri does not contain any percent signs, so it is already decoded.
69+
return uri;
670
}
71+
72+
const length = uri.length;
73+
let decoded = "";
74+
let last = 0;
75+
let codepoint = 0;
76+
let startOfOctets = percentPosition;
77+
let state = UTF8_ACCEPT;
78+
79+
while (percentPosition > -1 && percentPosition < length) {
80+
const high = hexCodeToInt(uri[percentPosition + 1], 4);
81+
const low = hexCodeToInt(uri[percentPosition + 2], 0);
82+
const byte = high | low;
83+
const type = UTF8_DATA[byte];
84+
state = UTF8_DATA[256 + state + type];
85+
codepoint = (codepoint << 6) | (byte & UTF8_DATA[364 + type]);
86+
87+
if (state === UTF8_ACCEPT) {
88+
decoded += uri.slice(last, startOfOctets);
89+
90+
decoded +=
91+
codepoint <= 0xffff
92+
? String.fromCharCode(codepoint)
93+
: String.fromCharCode(
94+
0xd7c0 + (codepoint >> 10),
95+
0xdc00 + (codepoint & 0x3ff)
96+
);
97+
98+
codepoint = 0;
99+
last = percentPosition + 3;
100+
percentPosition = startOfOctets = uri.indexOf("%", last);
101+
} else if (state === UTF8_REJECT) {
102+
return undefined;
103+
} else {
104+
percentPosition += 3;
105+
if (percentPosition < length && uri.charCodeAt(percentPosition) === 37) {
106+
continue;
107+
}
108+
109+
return undefined;
110+
}
111+
}
112+
113+
return decoded + uri.slice(last);
114+
}
115+
116+
function hexCodeToInt(c: string, shift: number): number {
117+
const i = HEX[c as keyof typeof HEX];
118+
return i === undefined ? 255 : i << shift;
7119
}

0 commit comments

Comments
 (0)