Skip to content

Commit f8b8d35

Browse files
authored
Switch to use filenamify instead of abandoned sanitize-filename package (#606)
* Switch to use filenamify instead of abandoned sanitize-filename package * Add debug logging for sanitization
1 parent 18014be commit f8b8d35

File tree

7 files changed

+90
-96
lines changed

7 files changed

+90
-96
lines changed

lib/filename-generator/by-site-structure.js

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
import path from 'path';
22
import url from 'url';
3-
import sanitizeFilename from 'sanitize-filename';
4-
import {getHostFromUrl, getFilepathFromUrl, getFilenameExtension, shortenFilename} from '../utils/index.js';
3+
import {getHostFromUrl, getFilepathFromUrl, getFilenameExtension, sanitizeFilename} from '../utils/index.js';
54
import resourceTypes from '../config/resource-types.js';
65
import resourceTypeExtensions from '../config/resource-ext-by-type.js';
76

@@ -49,7 +48,6 @@ export default function generateFilename (resource, {defaultFilename}) {
4948

5049
function sanitizeFilepath (filePath) {
5150
filePath = path.normalize(filePath);
52-
const pathParts = filePath.split(path.sep).map(pathPart => sanitizeFilename(pathPart, {replacement: '_'}));
53-
pathParts[pathParts.length - 1] = shortenFilename(pathParts[pathParts.length - 1]);
51+
const pathParts = filePath.split(path.sep).map(pathPart => sanitizeFilename(pathPart));
5452
return pathParts.join(path.sep);
5553
}

lib/filename-generator/by-type.js

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,26 @@
11
import path from 'path';
2-
import sanitizeFilename from 'sanitize-filename';
3-
import {shortenFilename, getFilenameExtension, getFilenameFromUrl} from '../utils/index.js';
2+
import {sanitizeFilename, getFilenameExtension, getFilenameFromUrl} from '../utils/index.js';
43
import typeExtensions from '../config/resource-ext-by-type.js';
54

65
export default function generateFilename (resource, {subdirectories, defaultFilename}, occupiedFileNames) {
76
const occupiedNames = getSubDirectoryNames({subdirectories}).concat(occupiedFileNames);
87

98
let filename = getFilenameForResource(resource, {subdirectories, defaultFilename});
10-
filename = shortenFilename(sanitizeFilename(filename, {replacement: '_'}));
9+
filename = sanitizeFilename(filename);
1110

1211
const extension = getFilenameExtension(filename);
1312
const directory = getDirectoryByExtension(extension, {subdirectories, defaultFilename});
1413

15-
let currentFilename = path.join(directory, filename);
14+
let currentFilepath = path.join(directory, filename);
1615
const basename = path.basename(filename, extension);
1716
let index = 1;
1817

19-
while (occupiedNames.includes(currentFilename)) {
20-
currentFilename = path.join(directory, `${basename}_${index}${extension}`);
18+
while (occupiedNames.includes(currentFilepath)) {
19+
currentFilepath = path.join(directory, `${basename}_${index}${extension}`);
2120
index++;
2221
}
2322

24-
return currentFilename;
23+
return currentFilepath;
2524
}
2625

2726
function getFilenameForResource (resource, {defaultFilename}) {

lib/utils/index.js

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
11
import url from 'url';
22
import path from 'path';
33
import normalize from 'normalize-url';
4+
import { default as filenamifySanitizeFilename } from 'filenamify';
45
import typeByMime from '../config/resource-type-by-mime.js';
56
import typeByExt from '../config/resource-type-by-ext.js';
67

78
import logger from '../logger.js';
89

9-
const MAX_FILENAME_LENGTH = 255;
10+
11+
const MAX_FILENAME_LENGTH = 230;
1012
const IS_URL = /^((http[s]?:)?\/\/)/;
1113

1214
function isUrl (path) {
@@ -97,13 +99,12 @@ function getFilenameExtension (filepath) {
9799
return (typeof filepath === 'string') ? path.extname(filepath).toLowerCase() : null;
98100
}
99101

100-
function shortenFilename (filename) {
101-
if (filename.length >= MAX_FILENAME_LENGTH) {
102-
const shortFilename = filename.substring(0, 20) + getFilenameExtension(filename);
103-
logger.debug(`[utils] shorten filename: ${filename} -> ${shortFilename}`);
104-
return shortFilename;
102+
function sanitizeFilename (filename) {
103+
const sanitized = filenamifySanitizeFilename(filename, {replacement: '_', maxLength: MAX_FILENAME_LENGTH});
104+
if (sanitized !== filename) {
105+
logger.debug(`[utils] sanitize filename: ${filename} -> ${sanitized}`);
105106
}
106-
return filename;
107+
return sanitized;
107108
}
108109

109110
function normalizeUrl (u, opts) {
@@ -183,7 +184,7 @@ function updateResourceEncoding (resource, encoding) {
183184
const updatedText = Buffer.from(resourceText, resource.getEncoding()).toString(encoding);
184185
resource.setText(updatedText);
185186
}
186-
187+
187188
resource.setEncoding(encoding);
188189
}
189190

@@ -197,7 +198,7 @@ export {
197198
getFilenameExtension,
198199
getHashFromUrl,
199200
getHostFromUrl,
200-
shortenFilename,
201+
sanitizeFilename,
201202
prettifyFilename,
202203
normalizeUrl,
203204
urlsEqual,

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,11 +40,11 @@
4040
"cheerio": "^1.1.2",
4141
"css-url-parser": "^1.0.0",
4242
"debug": "^4.3.1",
43+
"filenamify": "^7.0.0",
4344
"fs-extra": "^11.1.0",
4445
"got": "^14.4.7",
4546
"normalize-url": "^8.0.0",
4647
"p-queue": "^9.0.0",
47-
"sanitize-filename": "^1.6.3",
4848
"srcset": "^5.0.0"
4949
},
5050
"devDependencies": {

test/unit/filename-generator/by-site-structure-test.js

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ describe('FilenameGenerator: bySiteStructure', () => {
3434

3535
it('should replace not allowed characters from filename', () => {
3636
const r1 = new Resource('http://example.com/some/path/<*a*>.png');
37-
bySiteStructureFilenameGenerator(r1, options).should.equalFileSystemPath('example.com/some/path/__a__.png');
37+
bySiteStructureFilenameGenerator(r1, options).should.equalFileSystemPath('example.com/some/path/_a_.png');
3838
});
3939

4040
it('should replace not allowed characters from path', () => {
@@ -74,8 +74,10 @@ describe('FilenameGenerator: bySiteStructure', () => {
7474
it('should shorten filename', () => {
7575
const resourceFilename = new Array(1000).fill('a').join('') + '.png';
7676
const r = new Resource('http://example.com/' + resourceFilename);
77-
const filename = bySiteStructureFilenameGenerator(r, options);
78-
should(filename.length).be.lessThan(255);
77+
const filepath = bySiteStructureFilenameGenerator(r, options);
78+
const filenameParts = filepath.split('/');
79+
const filename = filenameParts[filenameParts.length - 1];
80+
should(filename.length).be.lessThanOrEqual(255);
7981
});
8082

8183
it('should shorten filename if resource is html without ext and default name is too long', () => {
@@ -85,17 +87,17 @@ describe('FilenameGenerator: bySiteStructure', () => {
8587
const filepath = bySiteStructureFilenameGenerator(r, { defaultFilename: defaultFilename });
8688
const filenameParts = filepath.split('/');
8789
const filename = filenameParts[filenameParts.length - 1];
88-
should(filename.length).be.lessThan(255);
90+
should(filename.length).be.lessThanOrEqual(255);
8991
});
9092

9193
it('should return decoded filepath', () => {
9294
const r = new Resource('https://developer.mozilla.org/ru/docs/JavaScript_%D1%88%D0%B5%D0%BB%D0%BB%D1%8B');
93-
const filename = bySiteStructureFilenameGenerator(r, options);
94-
filename.should.equalFileSystemPath('developer.mozilla.org/ru/docs/JavaScript_шеллы');
95+
const filepath = bySiteStructureFilenameGenerator(r, options);
96+
filepath.should.equalFileSystemPath('developer.mozilla.org/ru/docs/JavaScript_шеллы');
9597

9698
const r2 = new Resource('https://developer.mozilla.org/Hello%20G%C3%BCnter.png');
97-
const filename2 = bySiteStructureFilenameGenerator(r2, options);
98-
filename2.should.equalFileSystemPath('developer.mozilla.org/Hello Günter.png');
99+
const filepath2 = bySiteStructureFilenameGenerator(r2, options);
100+
filepath2.should.equalFileSystemPath('developer.mozilla.org/Hello Günter.png');
99101
});
100102

101103
it('should keep query strings', () => {

test/unit/filename-generator/by-type-test.js

Lines changed: 50 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -8,27 +8,27 @@ import byTypeFilenameGenerator from '../../../lib/filename-generator/by-type.js'
88
describe('FilenameGenerator: byType', () => {
99
it('should return resource filename', () => {
1010
const r = new Resource('http://example.com/a.png', 'b.png');
11-
const filename = byTypeFilenameGenerator(r, {}, []);
12-
filename.should.equalFileSystemPath('b.png');
11+
const filepath = byTypeFilenameGenerator(r, {}, []);
12+
filepath.should.equalFileSystemPath('b.png');
1313
});
1414

1515
it('should return url-based filename if resource has no filename', () => {
1616
const r = new Resource('http://example.com/a.png');
17-
const filename = byTypeFilenameGenerator(r, {}, []);
18-
filename.should.equalFileSystemPath('a.png');
17+
const filepath = byTypeFilenameGenerator(r, {}, []);
18+
filepath.should.equalFileSystemPath('a.png');
1919
});
2020

2121
it('should return url-based filename if resource has empty filename', () => {
2222
const r = new Resource('http://example.com/a.png', '');
23-
const filename = byTypeFilenameGenerator(r, {}, []);
24-
filename.should.equalFileSystemPath('a.png');
23+
const filepath = byTypeFilenameGenerator(r, {}, []);
24+
filepath.should.equalFileSystemPath('a.png');
2525
});
2626

2727
it('should add missed extensions for html resources', () => {
2828
const r = new Resource('http://example.com/about', '');
2929
r.getType = sinon.stub().returns('html');
30-
const filename = byTypeFilenameGenerator(r, {}, []);
31-
filename.should.equalFileSystemPath('about.html');
30+
const filepath = byTypeFilenameGenerator(r, {}, []);
31+
filepath.should.equalFileSystemPath('about.html');
3232
});
3333

3434
it('should add missed extensions for css resources', () => {
@@ -41,15 +41,15 @@ describe('FilenameGenerator: byType', () => {
4141
it('should add missed extensions for js resources', () => {
4242
const r = new Resource('http://example.com/js', '');
4343
r.getType = sinon.stub().returns('js');
44-
const filename = byTypeFilenameGenerator(r, {}, []);
45-
filename.should.equalFileSystemPath('js.js');
44+
const filepath = byTypeFilenameGenerator(r, {}, []);
45+
filepath.should.equalFileSystemPath('js.js');
4646
});
4747

4848
it('should not add missed extensions for other resources', () => {
4949
const r = new Resource('http://1.gravatar.com/avatar/4d63e4a045c7ff22accc33dc08442f86?s=140&amp;d=%2Fwp-content%2Fuploads%2F2015%2F05%2FGood-JOb-150x150.jpg&amp;r=g', '');
5050
r.getType = sinon.stub().returns('home');
51-
const filename = byTypeFilenameGenerator(r, {}, []);
52-
filename.should.equalFileSystemPath('4d63e4a045c7ff22accc33dc08442f86');
51+
const filepath = byTypeFilenameGenerator(r, {}, []);
52+
filepath.should.equalFileSystemPath('4d63e4a045c7ff22accc33dc08442f86');
5353
});
5454

5555
it('should return filename with correct subdirectory', () => {
@@ -60,8 +60,8 @@ describe('FilenameGenerator: byType', () => {
6060
};
6161

6262
const r = new Resource('http://example.com/a.png');
63-
const filename = byTypeFilenameGenerator(r, options, []);
64-
filename.should.equalFileSystemPath('img/a.png');
63+
const filepath = byTypeFilenameGenerator(r, options, []);
64+
filepath.should.equalFileSystemPath('img/a.png');
6565
});
6666

6767
it('should return filename with correct subdirectory when string cases are different', () => {
@@ -72,14 +72,14 @@ describe('FilenameGenerator: byType', () => {
7272
};
7373

7474
const r = new Resource('http://example.com/a.PNG');
75-
const f = byTypeFilenameGenerator(r, options, []);
76-
f.should.equalFileSystemPath('img/a.PNG');
75+
const filepath = byTypeFilenameGenerator(r, options, []);
76+
filepath.should.equalFileSystemPath('img/a.PNG');
7777
});
7878

7979
it('should return different filename if desired filename is occupied', () => {
8080
const r = new Resource('http://second-example.com/a.png');
81-
const filename = byTypeFilenameGenerator(r, {}, [ 'a.png' ]);
82-
filename.should.not.equalFileSystemPath('a.png');
81+
const filepath = byTypeFilenameGenerator(r, {}, [ 'a.png' ]);
82+
filepath.should.not.equalFileSystemPath('a.png');
8383
});
8484

8585
it('should return different filename if desired filename is occupied N times', () => {
@@ -89,30 +89,32 @@ describe('FilenameGenerator: byType', () => {
8989
const r3 = new Resource('http://third-example.com/a.png');
9090
const r4 = new Resource('http://fourth-example.com/a.png');
9191

92-
const f1 = byTypeFilenameGenerator(r1, {}, occupiedFilenames);
93-
f1.should.equalFileSystemPath('a.png');
94-
occupiedFilenames.push(f1);
92+
const fp1 = byTypeFilenameGenerator(r1, {}, occupiedFilenames);
93+
fp1.should.equalFileSystemPath('a.png');
94+
occupiedFilenames.push(fp1);
9595

96-
const f2 = byTypeFilenameGenerator(r2, {}, occupiedFilenames);
97-
f2.should.not.equal(f1);
98-
occupiedFilenames.push(f2);
96+
const fp2 = byTypeFilenameGenerator(r2, {}, occupiedFilenames);
97+
fp2.should.not.equal(fp1);
98+
occupiedFilenames.push(fp2);
9999

100-
const f3 = byTypeFilenameGenerator(r3, {}, occupiedFilenames);
101-
f3.should.not.equal(f1);
102-
f3.should.not.equal(f2);
103-
occupiedFilenames.push(f3);
100+
const fp3 = byTypeFilenameGenerator(r3, {}, occupiedFilenames);
101+
fp3.should.not.equal(fp1);
102+
fp3.should.not.equal(fp2);
103+
occupiedFilenames.push(fp3);
104104

105-
const f4 = byTypeFilenameGenerator(r4, {}, occupiedFilenames);
106-
f4.should.not.equal(f1);
107-
f4.should.not.equal(f2);
108-
f4.should.not.equal(f3);
105+
const fp4 = byTypeFilenameGenerator(r4, {}, occupiedFilenames);
106+
fp4.should.not.equal(fp1);
107+
fp4.should.not.equal(fp2);
108+
fp4.should.not.equal(fp3);
109109
});
110110

111111
it('should shorten filename', () => {
112112
const resourceFilename = new Array(1000).fill('a').join('') + '.png';
113113
const r = new Resource('http://example.com/a.png', resourceFilename);
114-
const filename = byTypeFilenameGenerator(r, {}, []);
115-
should(filename.length).be.lessThan(255);
114+
const filepath = byTypeFilenameGenerator(r, {}, []);
115+
const filenameParts = filepath.split('/');
116+
const filename = filenameParts[filenameParts.length - 1];
117+
should(filename.length).be.lessThanOrEqual(255);
116118
});
117119

118120
it('should return different short filename if first short filename is occupied', () => {
@@ -121,28 +123,30 @@ describe('FilenameGenerator: byType', () => {
121123
const r1 = new Resource('http://first-example.com/a.png', resourceFilename);
122124
const r2 = new Resource('http://second-example.com/a.png', resourceFilename);
123125

124-
const f1 = byTypeFilenameGenerator(r1, {}, []);
125-
should(f1.length).be.lessThan(255);
126+
const fp1 = byTypeFilenameGenerator(r1, {}, []);
127+
const filenameParts1 = fp1.split('/');
128+
const filename1 = filenameParts1[filenameParts1.length - 1];
129+
should(filename1.length).be.lessThanOrEqual(255);
126130

127-
const f2 = byTypeFilenameGenerator(r2, {}, [ f1 ]);
128-
should(f2.length).be.lessThan(255);
129-
should(f2).not.be.eql(f1);
130-
131-
should(f2).not.be.eql(f1);
131+
const fp2 = byTypeFilenameGenerator(r2, {}, [ fp1 ]);
132+
const filenameParts2 = fp2.split('/');
133+
const filename2 = filenameParts2[filenameParts2.length - 1];
134+
should(filename2.length).be.lessThanOrEqual(255);
135+
should(filename2).not.be.eql(filename1);
132136
});
133137

134138
it('should return decoded url-based filename', () => {
135139
const r = new Resource('https://developer.mozilla.org/ru/docs/JavaScript_%D1%88%D0%B5%D0%BB%D0%BB%D1%8B');
136-
const filename = byTypeFilenameGenerator(r, {}, []);
137-
filename.should.equalFileSystemPath('JavaScript_шеллы');
140+
const filepath = byTypeFilenameGenerator(r, {}, []);
141+
filepath.should.equalFileSystemPath('JavaScript_шеллы');
138142

139143
const r2 = new Resource('https://developer.mozilla.org/Hello%20G%C3%BCnter.png');
140-
const filename2 = byTypeFilenameGenerator(r2, {}, []);
141-
filename2.should.equalFileSystemPath('Hello Günter.png');
144+
const filepath2 = byTypeFilenameGenerator(r2, {}, []);
145+
filepath2.should.equalFileSystemPath('Hello Günter.png');
142146
});
143147

144148
it('should remove not allowed characters from filename', () => {
145149
const r1 = new Resource('http://example.com/some/path/<*a*>.png');
146-
byTypeFilenameGenerator(r1, {}, []).should.equalFileSystemPath('__a__.png');
150+
byTypeFilenameGenerator(r1, {}, []).should.equalFileSystemPath('_a_.png');
147151
});
148152
});

0 commit comments

Comments
 (0)