Skip to content

Commit 448514f

Browse files
committed
Handle hash in urls for html resources
1 parent f1983f7 commit 448514f

File tree

4 files changed

+81
-1
lines changed

4 files changed

+81
-1
lines changed

lib/file-handlers/html.js

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
var cheerio = require('cheerio');
22
var Promise = require('bluebird');
33
var utils = require('../utils');
4+
var types = require('../config/resource-types');
45

56
function loadHtml (context, resource) {
67
var sources = context.getHtmlSources();
@@ -54,6 +55,12 @@ function loadResources (context, resource, source) {
5455

5556
return context.loadResource(htmlResource).then(function handleLoadedSource (loadedResource) {
5657
var relativePath = utils.getRelativePath(filename, loadedResource.getFilename());
58+
var hash = utils.getHashFromUrl(attr);
59+
60+
if (hash && loadedResource.getType() === types.html) {
61+
relativePath = relativePath.concat(hash);
62+
}
63+
5764
el.attr(source.attr, relativePath);
5865
return Promise.resolve();
5966
});

lib/utils.js

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,10 @@ function getFilenameFromUrl (u) {
3030
return path.basename(url.parse(u).pathname);
3131
}
3232

33+
function getHashFromUrl (u) {
34+
return url.parse(u).hash || '';
35+
}
36+
3337
function waitAllFulfilled(promises) {
3438
return Promise.all(promises.map(function(promise) {
3539
return promise.reflect();
@@ -42,5 +46,6 @@ module.exports = {
4246
getUnixPath: getUnixPath,
4347
getRelativePath: getRelativePath,
4448
getFilenameFromUrl: getFilenameFromUrl,
49+
getHashFromUrl: getHashFromUrl,
4550
waitAllFulfilled: waitAllFulfilled
4651
};

test/unit/file-handlers/html-test.js

Lines changed: 54 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,8 @@ var defaultScraperOpts = {
1414
sources: [
1515
{ selector: 'img', attr: 'src' },
1616
{ selector: 'link[rel="stylesheet"]', attr: 'href' },
17-
{ selector: 'script', attr: 'src' }
17+
{ selector: 'script', attr: 'src'},
18+
{ selector: 'a', attr: 'href' }
1819
]
1920
};
2021
var scraper;
@@ -213,5 +214,57 @@ describe('Html handler', function () {
213214
done();
214215
}).catch(done);
215216
});
217+
218+
it('should keep hash in url for html resources', function (done) {
219+
nock('http://example.com').get('/page1.html').reply(200, 'OK');
220+
221+
var resourceStub = new Resource('http://example.com/page1.html', 'local/page1.html');
222+
sinon.stub(resourceStub, 'getType').returns('html');
223+
sinon.stub(scraper, 'loadResource').returns(Promise.resolve(resourceStub));
224+
225+
var html = '\
226+
<html> \
227+
<body> \
228+
<a href="http://example.com/page1.html#hash">link</a> \
229+
</body> \
230+
</html>\
231+
';
232+
233+
var po = new Resource('http://example.com', 'index.html');
234+
po.setText(html);
235+
236+
return loadHtml(scraper, po).then(function(){
237+
var text = po.getText();
238+
text.should.containEql('local/page1.html#hash');
239+
done();
240+
}).catch(done);
241+
});
242+
243+
it('should remove hash from url for not-html resources', function (done) {
244+
nock('http://example.com').get('/page1.html').reply(200, 'OK');
245+
246+
var resourceStub = new Resource('http://example.com/page1.html', 'local/page1.html');
247+
sinon.stub(resourceStub, 'getType').returns('other');
248+
sinon.stub(scraper, 'loadResource').returns(Promise.resolve(resourceStub));
249+
250+
var html = '\
251+
<html> \
252+
<body> \
253+
<a href="http://example.com/page1.html#hash">link</a> \
254+
</body> \
255+
</html>\
256+
';
257+
258+
var po = new Resource('http://example.com', 'index.html');
259+
po.setText(html);
260+
261+
return loadHtml(scraper, po).then(function(){
262+
var text = po.getText();
263+
text.should.not.containEql('local/page1.html#hash');
264+
text.should.containEql('local/page1.html');
265+
done();
266+
}).catch(done);
267+
});
268+
216269
});
217270
});

test/unit/utils-test.js

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,21 @@ describe('Common utils', function () {
5656
});
5757
});
5858

59+
describe('#getHashFromUrl', function () {
60+
it('should return hash from url', function () {
61+
utils.getHashFromUrl('#').should.be.equal('#');
62+
utils.getHashFromUrl('#hash').should.be.equal('#hash');
63+
utils.getHashFromUrl('page.html#hash').should.be.equal('#hash');
64+
utils.getHashFromUrl('http://example.com/page.html#hash').should.be.equal('#hash');
65+
});
66+
67+
it('should return empty string if url doesn\'t contain hash', function () {
68+
utils.getHashFromUrl('').should.be.equal('');
69+
utils.getHashFromUrl('page.html?a=b').should.be.equal('');
70+
utils.getHashFromUrl('http://example.com/page.html?a=b').should.be.equal('');
71+
});
72+
});
73+
5974
describe('#getRelativePath', function () {
6075
it('should return relative path', function () {
6176
utils.getRelativePath('css/1.css', 'img/1.png').should.be.equal('../img/1.png');

0 commit comments

Comments
 (0)