Skip to content

Commit 539b48d

Browse files
authored
Fix relative resources url in redirected page (#242), closes #239
1 parent 028e9cf commit 539b48d

File tree

5 files changed

+87
-8
lines changed

5 files changed

+87
-8
lines changed

lib/utils/index.js

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
const url = require('url');
44
const path = require('path');
55
const Promise = require('bluebird');
6-
const normalizeUrl = require('normalize-url');
6+
const normalize = require('normalize-url');
77
const htmlEntities = require('he');
88
const _ = require('lodash');
99
const typeByMime = require('../config/resource-type-by-mime');
@@ -66,7 +66,7 @@ function getFilenameFromUrl (u) {
6666
* @returns {string} path
6767
*/
6868
function getFilepathFromUrl (u) {
69-
var nu = normalizeUrl(u);
69+
const nu = normalizeUrl(u, {removeTrailingSlash: true});
7070
return getPathnameFromUrl(nu).substring(1);
7171
}
7272

@@ -109,6 +109,10 @@ function waitAllFulfilled (promises) {
109109
}));
110110
}
111111

112+
function normalizeUrl (u, opts) {
113+
return normalize(u, extend({removeTrailingSlash: false}, opts));
114+
}
115+
112116
function urlsEqual (url1, url2) {
113117
return normalizeUrl(url1) === normalizeUrl(url2);
114118
}
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
<!DOCTYPE html>
2+
<html lang="en">
3+
<head>
4+
<meta charset="UTF-8">
5+
<title>About</title>
6+
<link rel="stylesheet" type="text/css" href="/style.css"/> <!-- /style.css -->
7+
<link rel="stylesheet" type="text/css" href="style.css"/> <!-- /about/style.css -->
8+
</head>
9+
<body>
10+
11+
</body>
12+
</html>
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
<!DOCTYPE html>
2+
<html lang="en">
3+
<head>
4+
<meta charset="UTF-8">
5+
<title>Index</title>
6+
<link rel="stylesheet" type="text/css" href="style.css"/>
7+
</head>
8+
<body>
9+
<a href="/about">About</a>
10+
</body>
11+
</html>

test/functional/redirect/redirect.test.js

Lines changed: 52 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,14 @@
1+
'use strict';
2+
13
require('should');
2-
var nock = require('nock');
3-
var fs = require('fs-extra');
4-
var sinon = require('sinon');
5-
var Scraper = require('../../../lib/scraper');
4+
const nock = require('nock');
5+
const fs = require('fs-extra');
6+
const sinon = require('sinon');
7+
const Scraper = require('../../../lib/scraper');
8+
const scrape = require('../../../index');
69

7-
var testDirname = __dirname + '/.tmp';
8-
var mockDirname = __dirname + '/mocks';
10+
const testDirname = __dirname + '/.tmp';
11+
const mockDirname = __dirname + '/mocks';
912

1013
describe('Functional redirects', function() {
1114

@@ -60,4 +63,47 @@ describe('Functional redirects', function() {
6063
fs.readFileSync(testDirname + '/true-page.html').toString().should.be.eql('true page 1');
6164
});
6265
});
66+
67+
it('should correctly handle relative source in redirected page', () => {
68+
const options = {
69+
urls: [
70+
{ url: 'http://example.com', filename: 'index.html'}
71+
],
72+
directory: testDirname,
73+
subdirectories: [
74+
{ directory: 'css', extensions: ['.css'] }
75+
],
76+
maxRecursiveDepth: 1,
77+
sources: [
78+
{selector: 'link', attr: 'href'},
79+
{selector: 'a', attr: 'href'}
80+
]
81+
};
82+
83+
nock('http://example.com/').get('/').replyWithFile(200, mockDirname + '/relative-resources-index.html');
84+
nock('http://example.com/').get('/about').reply(301, '', {'Location': 'http://example.com/about/'});
85+
nock('http://example.com/').get('/about/').replyWithFile(200, mockDirname + '/relative-resources-about.html', {'content-type': 'text/html'});
86+
nock('http://example.com/').get('/style.css').reply(200, 'style.css');
87+
nock('http://example.com/').get('/about/style.css').reply(200, 'about/style.css');
88+
89+
return scrape(options).then(function() {
90+
fs.existsSync(testDirname + '/index.html').should.be.eql(true);
91+
fs.existsSync(testDirname + '/about.html').should.be.eql(true);
92+
fs.existsSync(testDirname + '/css/style.css').should.be.eql(true);
93+
fs.existsSync(testDirname + '/css/style_1.css').should.be.eql(true);
94+
95+
const style = fs.readFileSync(testDirname + '/css/style.css').toString();
96+
style.should.be.eql('style.css');
97+
98+
const style_1 = fs.readFileSync(testDirname + '/css/style_1.css').toString();
99+
style_1.should.be.eql('about/style.css');
100+
101+
const index = fs.readFileSync(testDirname + '/index.html').toString();
102+
index.should.containEql('<link rel="stylesheet" type="text/css" href="css/style.css">');
103+
104+
const about = fs.readFileSync(testDirname + '/about.html').toString();
105+
about.should.containEql('<link rel="stylesheet" type="text/css" href="css/style.css">');
106+
about.should.containEql('<link rel="stylesheet" type="text/css" href="css/style_1.css">');
107+
});
108+
});
63109
});

test/unit/utils/utils-test.js

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -220,4 +220,10 @@ describe('Utils', function () {
220220
should(utils.decodeHtmlEntities('?a=1&amp;v=2')).be.eql('?a=1&v=2');
221221
});
222222
});
223+
224+
describe('#urlsEqual', () => {
225+
it('should return false for /path and /path/', function() {
226+
should(utils.urlsEqual('http://example.com/path', 'http://example.com/path/')).be.eql(false);
227+
});
228+
})
223229
});

0 commit comments

Comments
 (0)