Skip to content

Commit cad9ae0

Browse files
committed
fix: Properly parse charset for specific parts
Signed-off-by: Richie Bendall <[email protected]>
1 parent f8f9de5 commit cad9ae0

File tree

5 files changed

+30
-9
lines changed

5 files changed

+30
-9
lines changed

package.json

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
"scripts": {
2121
"dev": "yarn js --watch",
2222
"build": "yarn js && yarn docs",
23-
"js": "tsc --outDir dist/",
23+
"js": "tsc",
2424
"docs": "typedoc --out ./docs --mode file --target ES6 --ignoreCompilerErrors ./src",
2525
"lint": "eslint src/**/*",
2626
"test": "ava"
@@ -29,7 +29,8 @@
2929
"cheerio": "^1.0.0-rc.3",
3030
"content-type": "^1.0.4",
3131
"iconv-lite": "^0.5.0",
32-
"lodash": "^4.17.15"
32+
"lodash": "^4.17.15",
33+
"nice-try": "^2.0.0"
3334
},
3435
"devDependencies": {
3536
"@types/cheerio": "^0.22.13",
@@ -59,5 +60,8 @@
5960
"require": [
6061
"ts-node/register"
6162
]
63+
},
64+
"engines": {
65+
"node": ">=0.12"
6266
}
6367
}

src/lib/convert-body.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import getCharset from "../utils/get-charset"
2-
import { decode as convert } from "iconv-lite"
2+
import { decode } from "iconv-lite"
33
import { load as $ } from "cheerio"
44
import _ from "lodash"
55

@@ -31,10 +31,10 @@ export function convertBody(buffer: Buffer, headers?: Headers): string {
3131

3232
// Prevent decode issues when sites use incorrect encoding
3333
// ref: https://hsivonen.fi/encoding-menu/
34-
if (charset && _.lowerCase(charset) in ["gb2312", "gbk"]) charset = "gb18030"
34+
if (charset && _.includes(["gb2312", "gbk"], _.lowerCase(charset))) charset = "gb18030"
3535

3636
// Turn raw buffers into a single utf-8 buffer
37-
return convert(
37+
return decode(
3838
buffer,
3939
charset || "utf-8",
4040
)

src/utils/get-charset.ts

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,15 @@
11
import { parse as parseContentType } from "content-type"
22
import _ from "lodash"
3+
import niceTry from "nice-try"
34

45
/**
56
* Get the character set from a Content-Type header.
67
* @param contentType The Content-Type HTTP header.
78
*/
8-
export default function getCharSet(contentType: string): string | null {
9-
return !_.isNil(contentType) ? parseContentType(contentType).parameters.charset : null
9+
export default function getCharset(contentType: string): string | null {
10+
if (_.isNil(contentType)) return null
11+
12+
const parsed = niceTry(() => parseContentType(contentType))
13+
if (!_.isNil(parsed)) return parsed.parameters.charset
14+
else return contentType
1015
}

tsconfig.json

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,12 @@
11
{
22
"compilerOptions": {
3-
"esModuleInterop": true
4-
}
3+
"esModuleInterop": true,
4+
"sourceMap": true,
5+
"declaration": true,
6+
"outDir": "dist/",
7+
"target": "es5"
8+
},
9+
"include": [
10+
"src/**/*"
11+
]
512
}

yarn.lock

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2887,6 +2887,11 @@ nice-try@^1.0.4:
28872887
resolved "https://registry.yarnpkg.com/nice-try/-/nice-try-1.0.5.tgz#a3378a7696ce7d223e88fc9b764bd7ef1089e366"
28882888
integrity sha512-1nh45deeb5olNY7eX82BkPO7SSxR5SSYJiPTrTdFUVYwAl8CKMA5N9PjTYkHiRjisVcxcQ1HXdLhx2qxxJzLNQ==
28892889

2890+
nice-try@^2.0.0:
2891+
version "2.0.0"
2892+
resolved "https://registry.yarnpkg.com/nice-try/-/nice-try-2.0.0.tgz#0ccb36ce82be13528f0217334adcf870c8eb3356"
2893+
integrity sha512-/o4KoXbUkEjD2NDwvBCkSzEDOfMuHPmbzSD5Pp2Bb5U/69MnEvpc4YfW2Y1BJqJpcaam04+fR0kF8n5xm8b6MQ==
2894+
28902895
node-fetch@^2.6.0:
28912896
version "2.6.0"
28922897
resolved "https://registry.yarnpkg.com/node-fetch/-/node-fetch-2.6.0.tgz#e633456386d4aa55863f676a7ab0daa8fdecb0fd"

0 commit comments

Comments
 (0)