Skip to content

Commit de4b5d6

Browse files
authored
Fix malformed UTF-8 sequences causing router crashes (#56562)
1 parent 9ac56e8 commit de4b5d6

File tree

3 files changed

+105
-0
lines changed

3 files changed

+105
-0
lines changed
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
import type { Response, NextFunction } from 'express'
2+
3+
import { defaultCacheControl } from '@/frame/middleware/cache-control'
4+
import { ExtendedRequest } from '@/types'
5+
6+
/**
7+
* Middleware to handle malformed UTF-8 sequences in URLs that cause
8+
* decodeURIComponent to fail. This prevents crashes from malicious
9+
* requests containing invalid URL-encoded sequences like %FF.
10+
*/
11+
export default function handleMalformedUrls(
12+
req: ExtendedRequest,
13+
res: Response,
14+
next: NextFunction,
15+
) {
16+
// Check URL for malformed UTF-8 sequences
17+
// Express/router doesn't catch these during initial parsing - they cause
18+
// crashes later when decodeURIComponent is called at the router level
19+
const url = req.originalUrl || req.url
20+
try {
21+
decodeURIComponent(url)
22+
} catch {
23+
// If any decoding fails, this is a malformed URL
24+
defaultCacheControl(res)
25+
res.setHeader('content-type', 'text/plain')
26+
res.status(400).send('Bad Request: Malformed URL')
27+
return
28+
}
29+
30+
return next()
31+
}

src/shielding/middleware/index.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import express from 'express'
22

3+
import handleMalformedUrls from './handle-malformed-urls'
34
import handleInvalidQuerystrings from './handle-invalid-query-strings'
45
import handleInvalidPaths from './handle-invalid-paths'
56
import handleOldNextDataPaths from './handle-old-next-data-paths'
@@ -9,6 +10,7 @@ import handleInvalidHeaders from './handle-invalid-headers'
910

1011
const router = express.Router()
1112

13+
router.use(handleMalformedUrls)
1214
router.use(handleInvalidQuerystrings)
1315
router.use(handleInvalidPaths)
1416
router.use(handleOldNextDataPaths)

src/shielding/tests/malformed-urls.ts

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
import { describe, expect, test } from 'vitest'
2+
import { get } from '@/tests/helpers/e2etest'
3+
4+
describe('malformed URLs', () => {
5+
test('blocks URLs with %FF sequences', async () => {
6+
const res = await get('/en/site-policy/other-site-policies/github-account-%FFqrlkuciqll-policy')
7+
8+
expect(res.statusCode).toBe(400)
9+
expect(res.headers['content-type']).toMatch('text/plain')
10+
expect(res.body).toBe('Bad Request: Malformed URL')
11+
})
12+
13+
test('blocks URLs with %FE sequences', async () => {
14+
const res = await get('/en/some-page-%FE-test')
15+
expect(res.statusCode).toBe(400)
16+
expect(res.headers['content-type']).toMatch('text/plain')
17+
expect(res.body).toBe('Bad Request: Malformed URL')
18+
})
19+
20+
test('blocks URLs with overlong encoding %C0%80', async () => {
21+
const res = await get('/en/test-%C0%80-page')
22+
expect(res.statusCode).toBe(400)
23+
expect(res.headers['content-type']).toMatch('text/plain')
24+
expect(res.body).toBe('Bad Request: Malformed URL')
25+
})
26+
27+
test('blocks URLs with invalid UTF-8 continuation sequences', async () => {
28+
const res = await get('/en/test-%80%80-page')
29+
expect(res.statusCode).toBe(400)
30+
expect(res.headers['content-type']).toMatch('text/plain')
31+
expect(res.body).toBe('Bad Request: Malformed URL')
32+
})
33+
34+
test('allows URLs with control characters (valid UTF-8)', async () => {
35+
const res = await get('/en/test-%01-page')
36+
expect(res.statusCode).toBe(404) // Should be 404 since page doesn't exist, not 400
37+
// Control characters like %01 are valid UTF-8 and don't cause decoding errors
38+
})
39+
40+
test('allows valid URLs with proper encoding', async () => {
41+
const res = await get('/en/get-started')
42+
expect(res.statusCode).not.toBe(400)
43+
// Should not be blocked by malformed URL middleware
44+
})
45+
46+
test('allows valid URLs with proper percent encoding', async () => {
47+
const res = await get('/en/search?q=test%20query')
48+
expect(res.statusCode).not.toBe(400)
49+
// Should not be blocked by malformed URL middleware
50+
})
51+
52+
test('blocks malformed query parameters', async () => {
53+
// This is caught by checking originalUrl which contains the raw, unparsed URL
54+
const res = await get('/en/search?q=test%FF')
55+
expect(res.statusCode).toBe(400)
56+
expect(res.headers['content-type']).toMatch('text/plain')
57+
expect(res.body).toBe('Bad Request: Malformed URL')
58+
})
59+
60+
test('properly caches malformed URL responses', async () => {
61+
const res = await get('/en/malformed-%FF-url')
62+
expect(res.statusCode).toBe(400)
63+
expect(res.headers['cache-control']).toBeDefined()
64+
})
65+
66+
test('handles multiple malformed sequences', async () => {
67+
const res = await get('/en/test-%FF%FE%80-page')
68+
expect(res.statusCode).toBe(400)
69+
expect(res.headers['content-type']).toMatch('text/plain')
70+
expect(res.body).toBe('Bad Request: Malformed URL')
71+
})
72+
})

0 commit comments

Comments
 (0)