Skip to content

Commit 3bd2555

Browse files
authored
Revert encoding fix #482 (#495)
1 parent c6f60b8 commit 3bd2555

File tree

6 files changed

+15
-25
lines changed

6 files changed

+15
-25
lines changed

lib/config/defaults.js

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,8 @@ const config = {
4848
],
4949
request: {
5050
throwHttpErrors: false,
51-
responseType: 'buffer',
51+
encoding: 'binary',
52+
//cookieJar: true,
5253
decompress: true,
5354
headers: {
5455
'user-agent': defaultRequestUserAgent

lib/plugins/save-resource-to-fs-plugin.js

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,7 @@ class SaveResourceToFileSystemPlugin {
2020
registerAction('saveResource', async ({resource}) => {
2121
const filename = path.join(absoluteDirectoryPath, resource.getFilename());
2222
const text = resource.getText();
23-
const encoding = typeof text === 'string' ? 'utf-8' : 'binary';
24-
await fs.outputFile(filename, text, { encoding });
23+
await fs.outputFile(filename, text, { encoding: 'binary' });
2524
loadedResources.push(resource);
2625
});
2726

lib/request.js

Lines changed: 6 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,18 @@
11
import got from 'got';
22
import logger from './logger.js';
3-
import types from './config/resource-types.js';
4-
import { extend, isPlainObject, getTypeByMime } from './utils/index.js';
5-
6-
const TEXT_RESOURCE_TYPES = [types.html, types.css];
3+
import { extend, isPlainObject } from './utils/index.js';
74

85
function getMimeType (contentType) {
96
return contentType ? contentType.split(';')[0] : null;
107
}
118

12-
function defaultResponseHandler ({response, type}) {
13-
if (TEXT_RESOURCE_TYPES.includes(type)) {
14-
return response.body.toString();
15-
}
16-
return response.body;
9+
function defaultResponseHandler ({response}) {
10+
return Promise.resolve(response.body);
1711
}
1812

1913
function transformResult (result) {
2014
switch (true) {
21-
case typeof result === 'string' || Buffer.isBuffer(result):
15+
case typeof result === 'string':
2216
return {
2317
body: result,
2418
metadata: null
@@ -47,19 +41,14 @@ async function getRequest ({url, referer, options = {}, afterResponse = defaultR
4741

4842
const response = await got(requestOptions);
4943
logger.debug(`[request] received response for ${response.url}, statusCode ${response.statusCode}`);
50-
51-
const mimeType = getMimeType(response.headers['content-type']);
52-
const resourceType = getTypeByMime(mimeType);
53-
54-
const responseHandlerResult = transformResult(await afterResponse({ response, type: resourceType }));
44+
const responseHandlerResult = transformResult(await afterResponse({response}));
5545

5646
if (!responseHandlerResult) {
5747
return null;
5848
}
5949
return {
6050
url: response.url,
61-
type: resourceType,
62-
mimeType,
51+
mimeType: getMimeType(response.headers['content-type']),
6352
body: responseHandlerResult.body,
6453
metadata: responseHandlerResult.metadata
6554
};

lib/scraper.js

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ import {
1313
} from './plugins/index.js';
1414

1515
import * as utils from './utils/index.js';
16-
const { extend, union, urlsEqual, getTypeByFilename, series } = utils;
16+
const { extend, union, urlsEqual, getTypeByMime, getTypeByFilename, series } = utils;
1717
import NormalizedUrlMap from './utils/normalized-url-map.js';
1818

1919
const actionNames = [
@@ -170,7 +170,7 @@ class Scraper {
170170
self.requestedResourcePromises.set(responseData.url, requestPromise);
171171
}
172172

173-
resource.setType(responseData.type);
173+
resource.setType(getTypeByMime(responseData.mimeType));
174174

175175
const { filename } = await self.runActions('generateFilename', { resource, responseData });
176176
resource.setFilename(filename);

test/functional/encoding/hieroglyphs.test.js

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@ import scrape from 'website-scraper';
66
const testDirname = './test/functional/encoding/.tmp';
77
const mockDirname = './test/functional/encoding/mocks';
88

9-
describe('Functional: Korean characters are properly encoded/decoded', function() {
9+
// TODO: enable test when encoding issue is fixed
10+
xdescribe('Functional: Korean characters are properly encoded/decoded', function() {
1011
const options = {
1112
urls: [
1213
'http://example.com/',

test/unit/scraper-init-test.js

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ describe('Scraper initialization', function () {
121121

122122
s.options.request.should.containEql({
123123
throwHttpErrors: false,
124-
responseType: 'buffer',
124+
encoding: 'binary',
125125
decompress: true,
126126
https: {
127127
rejectUnauthorized: false
@@ -143,7 +143,7 @@ describe('Scraper initialization', function () {
143143

144144
s.options.request.should.eql({
145145
throwHttpErrors: true,
146-
responseType: 'buffer',
146+
encoding: 'binary',
147147
decompress: true,
148148
https: {
149149
rejectUnauthorized: false

0 commit comments

Comments
 (0)