node-fetch
diff --git a/‎README.md
Lines changed: 2 additions & 7 deletions b/‎README.md
Lines changed: 2 additions & 7 deletions
diff --git a/‎package.json
Lines changed: 4 additions & 9 deletions b/‎package.json
Lines changed: 4 additions & 9 deletions
diff --git a/‎src/index.ts
Lines changed: 48 additions & 4 deletions b/‎src/index.ts
Lines changed: 48 additions & 4 deletions
diff --git a/‎src/lib/convert-body.ts
Lines changed: 0 additions & 41 deletions b/‎src/lib/convert-body.ts
Lines changed: 0 additions & 41 deletions
diff --git a/‎src/lib/extract-content-type.ts
Lines changed: 0 additions & 38 deletions b/‎src/lib/extract-content-type.ts
Lines changed: 0 additions & 38 deletions
diff --git a/‎src/lib/get-total-bytes.ts
Lines changed: 0 additions & 27 deletions b/‎src/lib/get-total-bytes.ts
Lines changed: 0 additions & 27 deletions
diff --git a/‎src/lib/write-to-stream.ts
Lines changed: 0 additions & 25 deletions b/‎src/lib/write-to-stream.ts
Lines changed: 0 additions & 25 deletions
diff --git a/‎src/utils/get-charset.ts
Lines changed: 1 addition & 0 deletions b/‎src/utils/get-charset.ts
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/utils/is.ts
Lines changed: 0 additions & 87 deletions b/‎src/utils/is.ts
Lines changed: 0 additions & 87 deletions
@@ -1,6 +1,6 @@
 # Fetch Charset Detection [![Travis CI Build Status](https://img.shields.io/travis/com/Richienb/fetch-charset-detection/master.svg?style=for-the-badge)](https://travis-ci.com/Richienb/fetch-charset-detection)
 
-Charset detection and conversion utilities, originally from `node-fetch`.
+Charset detection and conversion, originally from `node-fetch`.
 
 [![NPM](https://nodei.co/npm/fetch-charset-detection.png?downloads=true&downloadRank=true&stars=true)](https://nodei.co/npm/fetch-charset-detection)
 
@@ -9,12 +9,7 @@ Charset detection and conversion utilities, originally from `node-fetch`.
 From your NodeJS application:
 
 ```js
-const {
-    convertBody,
-    extractContentType,
-    getTotalBytes,
-    writeToStream
-} = require("fetch-charset-detection");
+const convertBody = require("fetch-charset-detection");
 ```
 
 ## API
 
@@ -21,7 +21,7 @@
         "dev": "yarn js --watch",
         "build": "yarn js && yarn docs",
         "js": "tsc",
-        "docs": "typedoc --out ./docs --mode file --target ES6 --ignoreCompilerErrors ./src",
+        "docs": "typedoc",
         "lint": "xo",
         "test": "ava"
     },
@@ -37,23 +37,18 @@
         "@types/content-type": "^1.1.3",
         "@types/lodash": "^4.14.146",
         "ava": "^2.4.0",
-        "xo": "^0.25.3",
         "eslint-config-richienb": "^0.2.2",
-        "express": "^4.17.1",
-        "fetch-blob": "^1.0.4",
-        "form-data": "^3.0.0",
-        "get-port": "^5.0.0",
         "node-fetch": "^2.6.0",
-        "resumer": "^0.0.0",
         "ts-node": "^8.4.1",
         "typedoc": "^0.15.0",
-        "typescript": "^3.7.2"
+        "typescript": "^3.7.2",
+        "xo": "^0.25.3"
     },
     "resolutions": {
         "eslint": "^6.6.0"
     },
     "xo": {
-        "extends": "richienb/node",
+        "extends": "richienb/ts",
         "overrides": [
             {
                 "files": "test.js",
 
@@ -24,7 +24,51 @@
  * SOFTWARE.
  */
 
-export { convertBody } from "./lib/convert-body"
-export { extractContentType } from "./lib/extract-content-type"
-export { getTotalBytes } from "./lib/get-total-bytes"
-export { writeToStream } from "./lib/write-to-stream"
+import getCharset from "./utils/get-charset"
+import { decode } from "iconv-lite"
+import { load as $ } from "cheerio"
+import _ from "lodash"
+
+/**
+* Detect buffer encoding and convert to target encoding
+* ref: http://www.w3.org/TR/2011/WD-html5-20110113/parsing.html#determining-the-character-encoding
+*
+* @param content The content to convert.
+* @param headers HTTP Headers provided with a request.
+*/
+export default function convertBody(content: Buffer | string, headers?: Headers): string {
+    // Try to extract content-type header
+    const contentType = !_.isNil(headers) ? headers.get("content-type") : null
+
+    // Resulting charset
+    let charset: string
+
+    // Convert to buffer
+    if (_.isString(content)) content = Buffer.from(content)
+
+    // Header
+    if (contentType) charset = getCharset(contentType)
+
+    // No charset in content type, peek at response body for at most 1024 bytes
+    const res = _.toString(content.slice(0, 1024))
+
+    // HTML5, HTML4 and XML
+    if (!charset && res) {
+        charset = getCharset(
+            $(res)("meta[charset]").attr("charset") || // HTML5
+            $(res)("meta[http-equiv][content]").attr("content") || // HTML4
+            $(_.replace(res, /<\?(.*)\?>/im, "<$1>"), { xmlMode: true }).root().find("xml").attr("encoding"), // XML
+        )
+    }
+
+    // Prevent decode issues when sites use incorrect encoding
+    // ref: https://hsivonen.fi/encoding-menu/
+    if (charset && _.includes(["gb2312", "gbk"], _.lowerCase(charset))) charset = "gb18030"
+
+    // Turn raw buffers into a single utf-8 buffer
+    return decode(
+        content,
+        charset || "utf-8",
+    )
+}
+
@@ -5,6 +5,7 @@ import niceTry from "nice-try"
 /**
  * Get the character set from a Content-Type header.
  * @param contentType The Content-Type HTTP header.
+ * @private
  */
 export default function getCharset(contentType: string): string | null {
     if (_.isNil(contentType)) return null