Skip to content

Commit c565e14

Browse files
authored
Encode html entities for attribute updates (#406)
1 parent 9c9985b commit c565e14

File tree

6 files changed

+43
-4
lines changed

6 files changed

+43
-4
lines changed

lib/resource-handler/html/html-source-element.js

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,9 @@ class HtmlSourceElement {
3838
* @param {string} newData
3939
*/
4040
setData (newData) {
41-
this.rule.attr ? this.el.attr(this.rule.attr, newData) : this.el.text(newData);
41+
// todo: encode can be removed after https://github.com/cheeriojs/cheerio/issues/957 fixed
42+
const escapedData = utils.encodeHtmlEntities(newData);
43+
this.rule.attr ? this.el.attr(this.rule.attr, escapedData) : this.el.text(newData);
4244
}
4345

4446
removeIntegrityCheck () {

lib/utils/index.js

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,10 @@ function decodeHtmlEntities (text) {
146146
return typeof text === 'string' ? htmlEntities.decode(text) : '';
147147
}
148148

149+
function encodeHtmlEntities (text) {
150+
return typeof text === 'string' ? htmlEntities.escape(text) : '';
151+
}
152+
149153
function clone (obj) {
150154
return Object.assign({}, obj);
151155
}
@@ -188,6 +192,7 @@ module.exports = {
188192
getTypeByMime,
189193
getTypeByFilename,
190194
decodeHtmlEntities,
195+
encodeHtmlEntities,
191196
clone,
192197
extend,
193198
union,

test/functional/html-entities-in-url/html-entities-in-url.test.js renamed to test/functional/html-entities/html-entities.test.js

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ var scrape = require('../../../index');
66
var testDirname = __dirname + '/.tmp';
77
var mockDirname = __dirname + '/mocks';
88

9-
describe('Functional: html entities in url', function() {
9+
describe('Functional: html entities', function() {
1010

1111
beforeEach(function() {
1212
nock.cleanAll();
@@ -27,7 +27,9 @@ describe('Functional: html entities in url', function() {
2727
// /fonts?family=Myriad&v=2 => /fonts?family=Myriad&v=2
2828
nock('http://example.com/').get('/fonts?family=Myriad&v=2').reply(200, 'fonts.css', {'content-type': 'text/css'});
2929
// /?a=1&style-attr.png => /?a=1&style-attr.png
30-
nock('http://example.com/').get('/style-attr.png?a=1&style-attr.png').reply(200, 'style-attr.png', {'content-type': 'text/css'});
30+
nock('http://example.com/').get('/style-attr.png?a=1&style-attr.png').reply(200, 'style-attr.png');
31+
// "style-attr2.png" => style-attr2.png
32+
nock('http://example.com/').get('/style-attr2.png').reply(200, 'style-attr2.png');
3133
// /?a=1&b=2 => /?a=1&b=2
3234
nock('http://example.com/').get('/img.png?a=1&b=2').reply(200, 'img.png');
3335
// /test?b=2&c=3&d=4 => /test?b=2&c=3&d=4
@@ -56,10 +58,16 @@ describe('Functional: html entities in url', function() {
5658
fs.existsSync(testDirname + '/local/fonts.css').should.be.eql(true);
5759
should(fs.readFileSync(testDirname + '/local/fonts.css').toString()).be.eql('fonts.css');
5860

59-
should(indexHtml).containEql('background: url(\'local/style-attr.png\')');
61+
// single quote (') replaced with ' in attribute
62+
should(indexHtml).containEql('background: url('local/style-attr.png')');
6063
fs.existsSync(testDirname + '/local/style-attr.png').should.be.eql(true);
6164
should(fs.readFileSync(testDirname + '/local/style-attr.png').toString()).be.eql('style-attr.png');
6265

66+
// double quote (") replaced with " in attribute
67+
should(indexHtml).containEql('background: url("local/style-attr2.png")');
68+
fs.existsSync(testDirname + '/local/style-attr2.png').should.be.eql(true);
69+
should(fs.readFileSync(testDirname + '/local/style-attr2.png').toString()).be.eql('style-attr2.png');
70+
6371
should(indexHtml).containEql('img src="local/img.png');
6472
fs.existsSync(testDirname + '/local/img.png').should.be.eql(true);
6573
should(fs.readFileSync(testDirname + '/local/img.png').toString()).be.eql('img.png');

test/functional/html-entities-in-url/mocks/index.html renamed to test/functional/html-entities/mocks/index.html

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
</head>
88
<body>
99
<div style="background: url('http://example.com/style-attr.png?a=1&amp;style-attr.png')"></div>
10+
<div style="background: url(&quot;http://example.com/style-attr2.png&quot;)"></div>
1011
<img src="http://example.com/img.png?a=1&amp;b=2" />
1112
<a href="?b=2&amp;c=3&amp;d=4">test</a>
1213
</body>

test/unit/resource-handler/html.test.js

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -267,4 +267,27 @@ describe('ResourceHandler: Html', () => {
267267
resource.getText().should.not.containEql('integrity="sha256-X+Q/xqnlEgxCczSjjpp2AUGGgqM5gcBzhRQ0p+EAUEk="');
268268
});
269269
});
270+
271+
it('should use html entities for updated attributes', () => {
272+
const sources = [
273+
{ selector: '[style]', attr: 'style' },
274+
];
275+
downloadChildrenPaths.onFirstCall().resolves('width: 300px; height: 300px; background-image:url("./images/cat.jpg")');
276+
htmlHandler = new HtmlHandler({sources}, {downloadChildrenPaths});
277+
278+
const html = `
279+
<html>
280+
<body>
281+
<div style="width: 300px; height: 300px; background-image:url(&quot;http://example.com/cat.jpg&quot;)"></div>
282+
</body>
283+
</html>
284+
`;
285+
286+
const resource = new Resource('http://example.com', 'index.html');
287+
resource.setText(html);
288+
289+
return htmlHandler.handle(resource).then(() => {
290+
resource.getText().should.containEql('style="width: 300px; height: 300px; background-image:url(&quot;./images/cat.jpg&quot;)"');
291+
});
292+
});
270293
});

0 commit comments

Comments
 (0)