|
| 1 | +var should = require('should'); |
| 2 | +var nock = require('nock'); |
| 3 | +var fs = require('fs-extra'); |
| 4 | +var Scraper = require('../../../lib/scraper'); |
| 5 | + |
| 6 | +var testDirname = __dirname + '/.tmp'; |
| 7 | +var mockDirname = __dirname + '/mocks'; |
| 8 | + |
| 9 | +describe('Functional: html entities in url', function() { |
| 10 | + |
| 11 | + beforeEach(function() { |
| 12 | + nock.cleanAll(); |
| 13 | + nock.disableNetConnect(); |
| 14 | + }); |
| 15 | + |
| 16 | + afterEach(function() { |
| 17 | + nock.cleanAll(); |
| 18 | + nock.enableNetConnect(); |
| 19 | + fs.removeSync(testDirname); |
| 20 | + }); |
| 21 | + |
| 22 | + it('should decode all html-entities found in html files and not encode entities from css file', function() { |
| 23 | + nock('http://example.com/').get('/').replyWithFile(200, mockDirname + '/index.html'); |
| 24 | + nock('http://example.com/').get('/style.css').replyWithFile(200, mockDirname + '/style.css'); |
| 25 | + |
| 26 | + // in index.html |
| 27 | + // /fonts?family=Myriad&v=2 => /fonts?family=Myriad&v=2 |
| 28 | + nock('http://example.com/').get('/fonts?family=Myriad&v=2').reply(200, 'fonts.css', {'content-type': 'text/css'}); |
| 29 | + // /?a=1&style-attr.png => /?a=1&style-attr.png |
| 30 | + nock('http://example.com/').get('/style-attr.png?a=1&style-attr.png').reply(200, 'style-attr.png', {'content-type': 'text/css'}); |
| 31 | + // /?a=1&b=2 => /?a=1&b=2 |
| 32 | + nock('http://example.com/').get('/img.png?a=1&b=2').reply(200, 'img.png'); |
| 33 | + // /test?b=2&c=3&d=4 => /test?b=2&c=3&d=4 |
| 34 | + nock('http://example.com/').get('/?b=2&c=3&d=4').reply(200, 'index_1.html', {'content-type': 'text/html'}); |
| 35 | + |
| 36 | + // in style.css |
| 37 | + // /?v=2&name=external-style.png should stay not decoded |
| 38 | + nock('http://example.com/').get('/external-style.png?v=2&name=external-style.png').reply(200, 'external-style.png'); |
| 39 | + |
| 40 | + var options = { |
| 41 | + urls: [ 'http://example.com/' ], |
| 42 | + directory: testDirname, |
| 43 | + maxDepth: 2, |
| 44 | + recursive: true, |
| 45 | + subdirectories: [ |
| 46 | + { directory: 'local', extensions: ['.png', '.css'] } |
| 47 | + ], |
| 48 | + ignoreErrors: false |
| 49 | + }; |
| 50 | + var scraper = new Scraper(options); |
| 51 | + |
| 52 | + return scraper.scrape(options).then(function() { |
| 53 | + fs.existsSync(testDirname + '/index.html').should.be.eql(true); |
| 54 | + var indexHtml = fs.readFileSync(testDirname + '/index.html').toString(); |
| 55 | + |
| 56 | + should(indexHtml).containEql('href="local/fonts.css'); |
| 57 | + fs.existsSync(testDirname + '/local/fonts.css').should.be.eql(true); |
| 58 | + should(fs.readFileSync(testDirname + '/local/fonts.css').toString()).be.eql('fonts.css'); |
| 59 | + |
| 60 | + should(indexHtml).containEql('background: url(\'local/style-attr.png\')'); |
| 61 | + fs.existsSync(testDirname + '/local/style-attr.png').should.be.eql(true); |
| 62 | + should(fs.readFileSync(testDirname + '/local/style-attr.png').toString()).be.eql('style-attr.png'); |
| 63 | + |
| 64 | + should(indexHtml).containEql('img src="local/img.png'); |
| 65 | + fs.existsSync(testDirname + '/local/img.png').should.be.eql(true); |
| 66 | + should(fs.readFileSync(testDirname + '/local/img.png').toString()).be.eql('img.png'); |
| 67 | + |
| 68 | + should(indexHtml).containEql('href="index_1.html"'); |
| 69 | + fs.existsSync(testDirname + '/index_1.html').should.be.eql(true); |
| 70 | + should(fs.readFileSync(testDirname + '/index_1.html').toString()).be.eql('index_1.html'); |
| 71 | + |
| 72 | + fs.existsSync(testDirname + '/local/style.css').should.be.eql(true); |
| 73 | + var styleCss = fs.readFileSync(testDirname + '/local/style.css').toString(); |
| 74 | + |
| 75 | + should(styleCss).containEql('url(\'external-style.png\')'); |
| 76 | + fs.existsSync(testDirname + '/local/external-style.png').should.be.eql(true); |
| 77 | + should(fs.readFileSync(testDirname + '/local/external-style.png').toString()).be.eql('external-style.png'); |
| 78 | + }); |
| 79 | + }); |
| 80 | +}); |
0 commit comments