Skip to content

Commit 8cbfbc3

Browse files
committed
feat: Add convertBody, extractContentType and getTotalBytes
Signed-off-by: Richie Bendall <[email protected]>
1 parent fa89c43 commit 8cbfbc3

File tree

8 files changed

+345
-50
lines changed

8 files changed

+345
-50
lines changed

.gitignore

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,12 @@ typings/
103103
# Optional eslint cache
104104
.eslintcache
105105

106+
# Microbundle cache
107+
.rpt2_cache/
108+
.rts2_cache_cjs/
109+
.rts2_cache_es/
110+
.rts2_cache_umd/
111+
106112
# Optional REPL history
107113
.node_repl_history
108114

@@ -125,8 +131,9 @@ typings/
125131
# nuxt.js build output
126132
.nuxt
127133

128-
# react / gatsby
129-
public/
134+
# gatsby files
135+
.cache/
136+
public
130137

131138
# vuepress build output
132139
.vuepress/dist

LICENSE

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
MIT License
22

3-
Copyright (c) 2019 Richie Bendall
3+
Copyright (c) 2016 - 1019 The Node Fetch Team
44

55
Permission is hereby granted, free of charge, to any person obtaining a copy
66
of this software and associated documentation files (the "Software"), to deal

README.md

Lines changed: 5 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,17 @@
1-
# Typescript QuickStart
1+
# Fetch Charset Detection [![Travis CI Build Status](https://img.shields.io/travis/com/Richienb/fetch-charset-detection/master.svg?style=for-the-badge)](https://travis-ci.com/Richienb/fetch-charset-detection)
22

3-
A quick start template for Typescript.
3+
Convert a buffer and headers to UTF-8 text, originally from `node-fetch`.
44

5-
[![Travis CI Build Status](https://img.shields.io/travis/com/Richienb/typescript-quickstart/master.svg?style=for-the-badge)](https://travis-ci.com/Richienb/typescript-quickstart)
6-
[![CodeFactor Score](https://www.codefactor.io/repository/github/Richienb/typescript-quickstart/badge?style=for-the-badge)](https://www.codefactor.io/repository/github/Richienb/typescript-quickstart)
7-
8-
[![NPM](https://nodei.co/npm/typescript-quickstart.png?downloads=true&downloadRank=true&stars=true)](https://nodei.co/npm/typescript-quickstart)
5+
[![NPM](https://nodei.co/npm/fetch-charset-detection.png?downloads=true&downloadRank=true&stars=true)](https://nodei.co/npm/fetch-charset-detection)
96

107
## Importing
118

129
From your NodeJS application:
1310

1411
```js
15-
const TypeScriptQuickStart = require("typescript-quickstart")
16-
```
17-
18-
From your web application:
19-
20-
```html
21-
<script src="https://unpkg.com/typescript-quickstart/dist/index.min.js"></script>
22-
```
23-
24-
## Initialisation
25-
26-
```js
27-
const TypeScriptQuickStart = new TypeScriptQuickStart()
12+
const charsetDetection = require("fetch-charset-detection");
2813
```
2914

3015
## Usage
3116

32-
Read the [documentation](https://richienb.github.io/typescript-quickstart).
33-
34-
> The typescript-quickstart npm module is not maintained by this project.
17+
Read the [documentation](https://richienb.github.io/fetch-charset-detection).

package.json

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,39 @@
11
{
2-
"name": "typescript-quickstart",
3-
"description": "A quick start template for Typescript.",
2+
"name": "fetch-charset-detection",
3+
"description": "Convert a buffer and headers to UTF-8 text, originally from node-fetch.",
44
"keywords": [
5-
"quickstart",
6-
"typescript",
7-
"template",
8-
"compiler",
5+
"content-type",
6+
"headers",
7+
"http",
98
"meta",
10-
"node"
9+
"node-fetch"
1110
],
1211
"version": "0.0.0",
1312
"main": "dist/index.js",
1413
"files": [
1514
"src/**/*",
1615
"dist/**/*"
1716
],
18-
"repository": "https://github.com/Richienb/typescript-quickstart.git",
17+
"repository": "https://github.com/Richienb/fetch-charset-detection.git",
1918
"author": "Richie Bendall <[email protected]>",
2019
"license": "MIT",
2120
"scripts": {
2221
"dev": "microbundle --target node --watch",
2322
"build": "yarn js && yarn docs",
24-
"js": "microbundle --target node",
25-
"docs": "typedoc --out ./docs --mode file --target ES6 ./src",
23+
"js": "microbundle --target node --format es,cjs --external http,https,stream,zlib,cheerio,content-type,iconv-lite",
24+
"docs": "typedoc --out ./docs --mode file --target ES6 --ignoreCompilerErrors ./src",
2625
"lint": "eslint src/*"
2726
},
27+
"dependencies": {
28+
"cheerio": "^1.0.0-rc.3",
29+
"content-type": "^1.0.4",
30+
"iconv-lite": "^0.5.0"
31+
},
2832
"devDependencies": {
33+
"@types/cheerio": "^0.22.13",
34+
"@types/content-type": "^1.1.3",
2935
"@typescript-eslint/eslint-plugin": "^2.3.1",
36+
"@typescript-eslint/parser": "^2.3.1",
3037
"eslint": "^6.4.0",
3138
"eslint-config-google": "^0.14.0",
3239
"microbundle": "^0.11.0",

src/index.ts

Lines changed: 123 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
*
44
* MIT License
55
*
6-
* Copyright (c) 2019 Richie Bendall
6+
* Copyright (c) 2016 - 1019 The Node Fetch Team
77
*
88
* Permission is hereby granted, free of charge, to any person obtaining a copy
99
* of this software and associated documentation files (the 'Software'), to deal
@@ -24,15 +24,129 @@
2424
* SOFTWARE.
2525
*/
2626

27+
import { decode as convert } from "iconv-lite"
28+
import getCharSet from "./utils/getCharSet"
29+
import { parse as $ } from "cheerio"
30+
import { isURLSearchParams, isBlob, isArrayBuffer } from "./utils/is"
31+
import { Stream, Writable } from "stream"
32+
2733
/**
28-
* A quick start template for Typescript.
34+
* Detect buffer encoding and convert to target encoding
35+
* ref: http://www.w3.org/TR/2011/WD-html5-20110113/parsing.html#determining-the-character-encoding
36+
*
37+
* @param buffer Incoming buffer.
38+
* @param headers Headers provided with the request.
39+
*/
40+
export function convertBody(buffer: Buffer, headers?: Headers): string {
41+
const contentType = headers instanceof Headers ? headers.get("content-type") : null
42+
let charset: string
43+
44+
// Header
45+
if (contentType) {
46+
charset = getCharSet(contentType)
47+
}
48+
49+
// No charset in content type, peek at response body for at most 1024 bytes
50+
const res = buffer.slice(0, 1024).toString()
51+
52+
// HTML5, HTML4 and XML
53+
if (!charset && res) {
54+
charset = getCharSet(
55+
$(res)("meta[charset]").attr("charset") || // HTML5
56+
$(res)("meta[http-equiv][content]").attr("content") || // HTML4
57+
$(res.replace(/<\?(.*)\?>/im, "<$1>"), { xmlMode: true }).root().find("xml").attr("encoding") // XML
58+
)
59+
}
60+
61+
// Prevent decode issues when sites use incorrect encoding
62+
// ref: https://hsivonen.fi/encoding-menu/
63+
if (charset && charset.toLowerCase() in ["gb2312", "gbk"]) {
64+
charset = "gb18030"
65+
}
66+
67+
// Turn raw buffers into a single utf-8 buffer
68+
return convert(
69+
buffer,
70+
charset || "utf-8"
71+
)
72+
}
73+
74+
/**
75+
* Performs the operation "extract a `Content-Type` value from |object|" as
76+
* specified in the specification:
77+
* https://fetch.spec.whatwg.org/#concept-bodyinit-extract
78+
*
79+
* This function assumes that instance.body is present.
80+
*
81+
* @param body Any options.body input
2982
*/
30-
export class TypeScriptQuickStart {
31-
/**
32-
* Your first method.
33-
* @param text - The text to return
34-
*/
35-
public helloWorld(text: string = ""): string {
36-
return text
83+
export function extractContentType(body: any): string | null {
84+
// Body is string
85+
if (typeof body === "string") return "text/plain;charset=UTF-8"
86+
87+
// Body is a URLSearchParams
88+
if (isURLSearchParams(body)) return "application/x-www-form-urlencoded;charset=UTF-8"
89+
90+
// Body is blob
91+
if (isBlob(body)) return body.type || null
92+
93+
// Body is a Buffer (Buffer, ArrayBuffer or ArrayBufferView)
94+
if (Buffer.isBuffer(body) || isArrayBuffer(body) || ArrayBuffer.isView(body)) return null
95+
96+
// Detect form data input from form-data module
97+
if (typeof body.getBoundary === "function") return `multipart/form-data;boundary=${body.getBoundary()}`
98+
99+
// Body is stream - can't really do much about this
100+
if (body instanceof Stream) return null
101+
102+
// Body constructor defaults other things to string
103+
return "text/plain;charset=UTF-8"
104+
}
105+
106+
/**
107+
* The Fetch Standard treats this as if "total bytes" is a property on the body.
108+
* For us, we have to explicitly get it with a function.
109+
*
110+
* ref: https://fetch.spec.whatwg.org/#concept-body-total-bytes
111+
*
112+
* @param body Body object from the Body instance.
113+
*/
114+
export function getTotalBytes(body: any): number | null {
115+
// Body is null or undefined
116+
if (body == null) return 0
117+
118+
// Body is Blob
119+
if (isBlob(body)) return body.size
120+
121+
// Body is Buffer
122+
if (Buffer.isBuffer(body)) return body.length
123+
124+
// Detect form data input from form-data module
125+
if (body && typeof body.getLengthSync === "function") return body.hasKnownLength && body.hasKnownLength() ? body.getLengthSync() : null
126+
127+
// Body is stream
128+
return null
129+
}
130+
131+
/**
132+
* Write a Body to a Node.js WritableStream (e.g. http.Request) object.
133+
*
134+
* @param body Body object from the Body instance.
135+
* @param dest The stream to write to.
136+
*/
137+
export function writeToStream(body: any, dest: Writable): void {
138+
// Body is null
139+
if (body == null) dest.end()
140+
141+
// Body is Blob
142+
else if (isBlob(body)) body.stream().pipe(dest)
143+
144+
// Body is buffer
145+
else if (Buffer.isBuffer(body)) {
146+
dest.write(body)
147+
dest.end()
148+
} else {
149+
// Body is stream
150+
body.pipe(dest)
37151
}
38152
}

src/utils/getCharSet.ts

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
import { parse as parseContentType } from "content-type"
2+
3+
/**
4+
* Get the character set from a Content-Type header.
5+
* @param contentType The Content-Type HTTP header.
6+
*/
7+
export default function getCharSet(contentType: string): string | null {
8+
return contentType != null ? parseContentType(contentType).parameters.charset : null
9+
}

src/utils/is.ts

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
const NAME = Symbol.toStringTag;
2+
3+
/**
4+
* Check if `obj` is a URLSearchParams object
5+
* ref: https://github.com/bitinn/node-fetch/issues/296#issuecomment-307598143
6+
*
7+
* @param obj The object to check.
8+
*/
9+
export function isURLSearchParams(obj): boolean {
10+
return (
11+
typeof obj === 'object' &&
12+
typeof obj.append === 'function' &&
13+
typeof obj.delete === 'function' &&
14+
typeof obj.get === 'function' &&
15+
typeof obj.getAll === 'function' &&
16+
typeof obj.has === 'function' &&
17+
typeof obj.set === 'function' &&
18+
typeof obj.sort === 'function' &&
19+
obj[NAME] === 'URLSearchParams'
20+
);
21+
}
22+
23+
/**
24+
* Check if `obj` is a W3C `Blob` object (which `File` inherits from)
25+
*
26+
* @param obj The object to check.
27+
*/
28+
export function isBlob(obj): boolean {
29+
return (
30+
typeof obj === 'object' &&
31+
typeof obj.arrayBuffer === 'function' &&
32+
typeof obj.type === 'string' &&
33+
typeof obj.stream === 'function' &&
34+
typeof obj.constructor === 'function' &&
35+
/^(Blob|File)$/.test(obj[NAME])
36+
);
37+
}
38+
39+
/**
40+
* Check if `obj` is an instance of AbortSignal.
41+
*
42+
* @param obj The object to check.
43+
*/
44+
export function isAbortSignal(obj): boolean {
45+
return (
46+
typeof obj === 'object' &&
47+
obj[NAME] === 'AbortSignal'
48+
);
49+
}
50+
51+
/**
52+
* Check if `obj` is an instance of ArrayBuffer.
53+
*
54+
* @param obj The object to check.
55+
*/
56+
export function isArrayBuffer(obj): boolean {
57+
return obj[NAME] === 'ArrayBuffer';
58+
}
59+
60+
/**
61+
* Check if `obj` is an instance of AbortError.
62+
*
63+
* @param obj The object to check.
64+
*/
65+
export function isAbortError(obj): boolean {
66+
return obj.name === 'AbortError';
67+
}

0 commit comments

Comments
 (0)