Skip to content

Commit 60a59f5

Browse files
authored
Fix url comparison in requestResource (#117)
1 parent 9b7f578 commit 60a59f5

File tree

2 files changed

+14
-11
lines changed

2 files changed

+14
-11
lines changed

lib/scraper.js

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
var Promise = require('bluebird');
22
var _ = require('lodash');
3-
var normalizeUrl = require('normalize-url');
43

54
var logger = require('./logger');
65

@@ -12,6 +11,7 @@ var getFilenameGenerator = require('./filename-generators');
1211
var makeRequest = require('./request');
1312
var getResourceHandler = require('./file-handlers');
1413
var FSAdapter = require('./fs-adaper');
14+
var utils = require('./utils');
1515

1616
function Scraper (options) {
1717
var self = this;
@@ -52,23 +52,19 @@ Scraper.prototype.getOccupiedFileNames = function getOccupiedFileNames () {
5252
};
5353

5454
Scraper.prototype.addRespondedResourcePromise = function addRespondedResourcePromise (url, promise) {
55-
url = normalizeUrl(url);
56-
this.respondedResourcePromises[url] = promise;
55+
this.respondedResourcePromises[utils.normalizeUrl(url)] = promise;
5756
};
5857

5958
Scraper.prototype.getRespondedResourcePromise = function getRespondedResourcePromise (url) {
60-
url = normalizeUrl(url);
61-
return this.respondedResourcePromises[url];
59+
return this.respondedResourcePromises[utils.normalizeUrl(url)];
6260
};
6361

6462
Scraper.prototype.addLoadedResourcePromise = function addLoadedResourcePromise (url, promise) {
65-
url = normalizeUrl(url);
66-
this.loadedResourcePromises[url] = promise;
63+
this.loadedResourcePromises[utils.normalizeUrl(url)] = promise;
6764
};
6865

6966
Scraper.prototype.getLoadedResourcePromise = function getLoadedResourcePromise (url) {
70-
url = normalizeUrl(url);
71-
return this.loadedResourcePromises[url];
67+
return this.loadedResourcePromises[utils.normalizeUrl(url)];
7268
};
7369

7470
Scraper.prototype.getHtmlSources = function getHtmlSources () {
@@ -123,7 +119,7 @@ Scraper.prototype.requestResource = function requestResource (resource) {
123119
}).then(function requestCompleted (responseData) {
124120
logger.debug('received response for ' + url);
125121

126-
if (responseData.url !== url) { // Url may be changed in redirects
122+
if (!utils.urlsEqual(responseData.url, url)) { // Url may be changed in redirects
127123
logger.debug('url changed. old url = ' + url + ', new ulr = ' + responseData.url);
128124
resource.setUrl(responseData.url);
129125
self.addRespondedResourcePromise(responseData.url, respondedResourcePromise);

lib/utils.js

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
var url = require('url');
22
var path = require('path');
33
var Promise = require('bluebird');
4+
var normalizeUrl = require('normalize-url');
45

56
var logger = require('./logger');
67

@@ -74,6 +75,10 @@ function waitAllFulfilled (promises) {
7475
}));
7576
}
7677

78+
function urlsEqual (url1, url2) {
79+
return normalizeUrl(url1) === normalizeUrl(url2);
80+
}
81+
7782
module.exports = {
7883
isUrl: isUrl,
7984
getUrl: getUrl,
@@ -84,5 +89,7 @@ module.exports = {
8489
getFilenameExtension: getFilenameExtension,
8590
getHashFromUrl: getHashFromUrl,
8691
shortenFilename: shortenFilename,
87-
waitAllFulfilled: waitAllFulfilled
92+
waitAllFulfilled: waitAllFulfilled,
93+
normalizeUrl: normalizeUrl,
94+
urlsEqual: urlsEqual
8895
};

0 commit comments

Comments
 (0)