Skip to content

Commit 7531dc2

Browse files
ikeyans0ph1e
authored andcommitted
encode relative path when update url for downloaded resource (#294)
1 parent c2e0037 commit 7531dc2

File tree

4 files changed

+29
-8
lines changed

4 files changed

+29
-8
lines changed

lib/resource-handler/index.js

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,10 @@ class ResourceHandler {
6161

6262
let relativePath = utils.getRelativePath(parentResource.getFilename(), respondedResource.getFilename());
6363
if (self.options.prettifyUrls) {
64-
relativePath = relativePath.replace(self.options.defaultFilename, '');
64+
if (relativePath === self.options.defaultFilename
65+
|| relativePath.endsWith('/' + self.options.defaultFilename)) {
66+
relativePath = relativePath.slice(0, -self.options.defaultFilename.length);
67+
}
6568
}
6669
const hash = utils.getHashFromUrl(childPath);
6770

lib/utils/index.js

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,11 @@ function getUnixPath (filepath) {
3535
function getRelativePath (path1, path2) {
3636
const dirname = path.dirname(path1);
3737
const relativePath = path.relative(dirname, path2);
38-
return getUnixPath(relativePath);
38+
const escaped = relativePath
39+
.split(path.sep)
40+
.map(pathComponent => encodeURIComponent(pathComponent).replace(/['()]/g, c => '%' + c.charCodeAt(0).toString(16)))
41+
.join(path.sep);
42+
return getUnixPath(escaped);
3943
}
4044

4145
/**

test/unit/resource-handler/index.test.js

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ const should = require('should');
44
const sinon = require('sinon');
55
const Promise = require('bluebird');
66
const proxyquire = require('proxyquire');
7+
const path = require('path');
78
const Resource = require('../../../lib/resource');
89
const ResourceHandler = require('../../../lib/resource-handler');
910

@@ -182,19 +183,21 @@ describe('ResourceHandler', function() {
182183
pathContainer.getPaths.returns([
183184
'http://first.com/img/a.jpg',
184185
'http://first.com/b.jpg',
185-
'http://second.com/img/c.jpg'
186+
'http://second.com/img/c.jpg',
187+
'http://second.com/d',
186188
]);
187189

188-
scraperContext.requestResource.onFirstCall().returns(Promise.resolve(new Resource('http://first.com/img/a.jpg', 'local/a.jpg')));
189-
scraperContext.requestResource.onSecondCall().returns(Promise.resolve(new Resource('http://first.com/b.jpg', 'local/b.jpg')));
190-
scraperContext.requestResource.onThirdCall().returns(Promise.resolve(new Resource('http://second.com/img/c.jpg', 'local/c.jpg')));
190+
scraperContext.requestResource.onCall(0).returns(Promise.resolve(new Resource('http://first.com/img/a.jpg', 'local' + path.sep + 'a.jpg')));
191+
scraperContext.requestResource.onCall(1).returns(Promise.resolve(new Resource('http://first.com/b.jpg', 'local' + path.sep + 'b.jpg')));
192+
scraperContext.requestResource.onCall(2).returns(Promise.resolve(new Resource('http://second.com/img/c.jpg', 'local' + path.sep + 'c.jpg')));
193+
scraperContext.requestResource.onCall(3).returns(Promise.resolve(new Resource('http://second.com/d', 'a%b' + path.sep + '"\'( )?p=q&\\#')));
191194

192195
var updateChildSpy = sinon.spy(parentResource, 'updateChild');
193196

194197
return resHandler.downloadChildrenResources(pathContainer, parentResource).then(function () {
195198
var updateTextStub = pathContainer.updateText;
196199
updateTextStub.calledOnce.should.be.eql(true);
197-
updateTextStub.args[0][0].length.should.be.eql(3);
200+
updateTextStub.args[0][0].length.should.be.eql(4);
198201
updateTextStub.args[0][0].should.containEql({
199202
oldPath: 'http://first.com/img/a.jpg',
200203
newPath: 'local/a.jpg'
@@ -207,7 +210,11 @@ describe('ResourceHandler', function() {
207210
oldPath: 'http://second.com/img/c.jpg',
208211
newPath: 'local/c.jpg'
209212
});
210-
updateChildSpy.calledThrice.should.be.eql(true);
213+
updateTextStub.args[0][0].should.containEql({
214+
oldPath: 'http://second.com/d',
215+
newPath: 'a%25b/%22%27%28%20%29%3Fp%3Dq%26' + (path.sep === '\\' ? '/' : '%5C') + '%23'
216+
});
217+
updateChildSpy.callCount.should.be.eql(4);
211218
});
212219
});
213220

test/unit/utils/utils-test.js

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,13 @@ describe('Utils', function () {
117117
utils.getRelativePath('index.html', 'img/1.png').should.be.equal('img/1.png');
118118
utils.getRelativePath('css/1.css', 'css/2.css').should.be.equal('2.css');
119119
});
120+
it('should escape path components with encodeURIComponent', function () {
121+
utils.getRelativePath('index.html', 'a/css?family=Open+Sans:300,400,600,700&lang=en').should.be.equal('a/css%3Ffamily%3DOpen%2BSans%3A300%2C400%2C600%2C700%26lang%3Den');
122+
});
123+
it('should also escape [\'()]', function () {
124+
utils.getRelativePath('index.html', '\'single quote for html attrs\'').should.be.equal('%27single%20quote%20for%20html%20attrs%27');
125+
utils.getRelativePath('index.html', '(parenthesizes for css url)').should.be.equal('%28parenthesizes%20for%20css%20url%29');
126+
});
120127
});
121128

122129
describe('#shortenFilename', function() {

0 commit comments

Comments
 (0)