Skip to content

Commit db206b7

Browse files
authored
Scrape resources with query strings when using bySiteStructure (#414), fixes #411
1 parent 66f5113 commit db206b7

File tree

2 files changed

+38
-1
lines changed

2 files changed

+38
-1
lines changed

lib/filename-generator/by-site-structure.js

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,46 @@
11
const _ = require('lodash');
22
const path = require('path');
3+
const url = require('url');
34
const utils = require('../utils');
45
const resourceTypes = require('../config/resource-types');
56
const resourceTypeExtensions = require('../config/resource-ext-by-type');
67

78
module.exports = function generateFilename (resource, {defaultFilename}) {
89
const resourceUrl = resource.getUrl();
910
const host = utils.getHostFromUrl(resourceUrl);
11+
const urlParsed = url.parse(resourceUrl);
1012
let filePath = utils.getFilepathFromUrl(resourceUrl);
1113
const extension = utils.getFilenameExtension(filePath);
1214

1315
filePath = path.join(host.replace(':', '_'), filePath);
1416

17+
// If have query string
18+
if (urlParsed.query) {
19+
const parsed = path.parse(filePath);
20+
const basename = path.join(parsed.dir, parsed.name);
21+
// Use the query string as file name in the site structure directory
22+
if (!extension) {
23+
// Without extension: http://example.com/path?q=test => path/q=test
24+
filePath = `${basename}${path.sep}${urlParsed.query}`;
25+
} else {
26+
// With extension: http://example.com/path/picture.png?q=test => path/picture_q=test.png
27+
filePath = `${basename}_${urlParsed.query}${extension}`;
28+
}
29+
}
30+
1531
// If we have HTML from 'http://example.com/path' => set 'path/index.html' as filepath
1632
if (resource.isHtml()) {
1733
const htmlExtensions = resourceTypeExtensions[resourceTypes.html];
1834
const resourceHasHtmlExtension = _.includes(htmlExtensions, extension);
1935
// add index.html only if filepath has ext != html '/path/test.com' => '/path/test.com/index.html'
2036
if (!resourceHasHtmlExtension) {
21-
filePath = path.join(filePath, defaultFilename);
37+
if (!urlParsed.query) {
38+
// Without query string: http://example.com/path => path/index.html
39+
filePath = path.join(filePath, defaultFilename);
40+
} else {
41+
// With query string: http://example.com/path?q=test => path/q=test.html
42+
filePath = `${filePath}.html`;
43+
}
2244
}
2345
}
2446

test/unit/filename-generator/by-site-structure-test.js

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,4 +85,19 @@ describe('FilenameGenerator: bySiteStructure', function() {
8585
var filename2 = bySiteStructureFilenameGenerator(r2, options);
8686
filename2.should.equalFileSystemPath('developer.mozilla.org/Hello Günter.png');
8787
});
88+
89+
it('should keep query strings', function () {
90+
var isHtmlMock = sinon.stub().returns(true);
91+
92+
var r1 = new Resource('http://example.com/path?q=test');
93+
r1.isHtml = isHtmlMock;
94+
bySiteStructureFilenameGenerator(r1, options).should.equalFileSystemPath('example.com/path/q=test.html');
95+
96+
var r2 = new Resource('http://example.com/path?q1=test1&q2=test2');
97+
r2.isHtml = isHtmlMock;
98+
bySiteStructureFilenameGenerator(r2, options).should.equalFileSystemPath('example.com/path/q1=test1&q2=test2.html');
99+
100+
var r3 = new Resource('http://example.com/path/picture.png?q1=test1&q2=test2');
101+
bySiteStructureFilenameGenerator(r3, options).should.equalFileSystemPath('example.com/path/picture_q1=test1&q2=test2.png');
102+
})
88103
});

0 commit comments

Comments
 (0)