Skip to content

Commit 0d1ba41

Browse files
committed
Use fs.stat instead of fs.exists, add tests for maxDepth option
1 parent 64d98e8 commit 0d1ba41

File tree

4 files changed

+159
-6
lines changed

4 files changed

+159
-6
lines changed

lib/scraper.js

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
var Promise = require('bluebird');
22

33
var fs = require('fs-extra');
4-
var existsAsync = Promise.promisify(fs.exists);
4+
var existsAsync = Promise.promisify(fs.stat);
55
var outputFileAsync = Promise.promisify(fs.outputFile);
66
var ensureDirAsync = Promise.promisify(fs.ensureDir);
77

@@ -86,7 +86,7 @@ Scraper.prototype.getResourceHandler = function getHandler (resource) {
8686
var self = this;
8787
var type = resource.getType();
8888
var depth = resource.getDepth();
89-
var depthGreaterThanMax = self.options.maxDepth && depth > self.options.maxDepth;
89+
var depthGreaterThanMax = self.options.maxDepth && depth >= self.options.maxDepth;
9090

9191
switch (true) {
9292
case depthGreaterThanMax: return _.noop;
@@ -130,8 +130,10 @@ Scraper.prototype.loadResource = function loadResource (resource) {
130130

131131
Scraper.prototype.validate = function validate () {
132132
var dir = this.options.directory;
133-
return existsAsync(dir).then(function validateDirectoryExists (exists) {
134-
return exists ? Promise.reject(new Error('Path ' + dir + ' exists')) : Promise.resolve();
133+
return existsAsync(dir).then(function handleDirectoryExist () {
134+
return Promise.reject(new Error('Path ' + dir + ' exists'));
135+
}, function handleDirectoryNotExist () {
136+
return Promise.resolve();
135137
});
136138
};
137139

package.json

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,9 @@
4444
"istanbul": "^0.4.0",
4545
"mocha": "^2.2.5",
4646
"nock": "^2.9.1",
47+
"proxyquire": "^1.7.3",
4748
"should": "^7.0.2",
48-
"sinon": "^1.15.4"
49+
"sinon": "^1.15.4",
50+
"sinon-as-promised": "^4.0.0"
4951
}
5052
}

test/functional/recursive-test.js

Lines changed: 65 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ describe('Functional recursive downloading', function() {
2121
fs.removeSync(testDirname);
2222
});
2323

24-
it('should follow anchors', function(done) {
24+
it('should follow anchors if recursive flag is set', function(done) {
2525
var options = {
2626
urls: [ 'http://example.com/' ],
2727
directory: testDirname,
@@ -52,4 +52,68 @@ describe('Functional recursive downloading', function() {
5252
done();
5353
}).catch(done);
5454
});
55+
56+
it('should follow anchors with depth < maxDepth if recursive flag and maxDepth are set', function(done) {
57+
var options = {
58+
urls: [ 'http://example.com/' ],
59+
directory: testDirname,
60+
subdirectories: null,
61+
sources: [],
62+
recursive: true,
63+
maxDepth: 1
64+
};
65+
66+
nock('http://example.com/').get('/').replyWithFile(200, mockDirname + '/index.html');
67+
68+
// mock for anchors
69+
nock('http://example.com/').get('/about.html').replyWithFile(200, mockDirname + '/about.html');
70+
nock('http://example.com/').get('/link1.html').reply(200, 'content 1');
71+
nock('http://example.com/').get('/link2.html').reply(200, 'content 2');
72+
nock('http://example.com/').get('/link3.html').reply(200, 'content 3');
73+
74+
scraper.scrape(options).then(function() {
75+
fs.existsSync(testDirname + '/index.html').should.be.eql(true);
76+
77+
// index.html anchors loaded
78+
fs.existsSync(testDirname + '/about.html').should.be.eql(true);
79+
80+
// about.html anchors loaded
81+
fs.existsSync(testDirname + '/link1.html').should.be.eql(false);
82+
fs.existsSync(testDirname + '/link2.html').should.be.eql(false);
83+
fs.existsSync(testDirname + '/link3.html').should.be.eql(false);
84+
85+
done();
86+
}).catch(done);
87+
});
88+
89+
it('should not follow anchors if recursive flag is not set', function(done) {
90+
var options = {
91+
urls: [ 'http://example.com/' ],
92+
directory: testDirname,
93+
subdirectories: null,
94+
sources: []
95+
};
96+
97+
nock('http://example.com/').get('/').replyWithFile(200, mockDirname + '/index.html');
98+
99+
// mock for anchors
100+
nock('http://example.com/').get('/about.html').replyWithFile(200, mockDirname + '/about.html');
101+
nock('http://example.com/').get('/link1.html').reply(200, 'content 1');
102+
nock('http://example.com/').get('/link2.html').reply(200, 'content 2');
103+
nock('http://example.com/').get('/link3.html').reply(200, 'content 3');
104+
105+
scraper.scrape(options).then(function() {
106+
fs.existsSync(testDirname + '/index.html').should.be.eql(true);
107+
108+
// index.html anchors loaded
109+
fs.existsSync(testDirname + '/about.html').should.be.eql(false);
110+
111+
// about.html anchors loaded
112+
fs.existsSync(testDirname + '/link1.html').should.be.eql(false);
113+
fs.existsSync(testDirname + '/link2.html').should.be.eql(false);
114+
fs.existsSync(testDirname + '/link3.html').should.be.eql(false);
115+
116+
done();
117+
}).catch(done);
118+
});
55119
});

test/unit/scraper-test.js

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
var should = require('should');
22
var sinon = require('sinon');
3+
require('sinon-as-promised');
34
var nock = require('nock');
5+
var proxyquire = require('proxyquire');
46
var fs = require('fs-extra');
57
var path = require('path');
68
var _ = require('underscore');
@@ -552,7 +554,90 @@ describe('Scraper', function () {
552554
});
553555

554556
describe('#getResourceHandler', function() {
557+
var Scraper;
558+
var noopStub;
559+
var cssLoadStub;
560+
var htmlLoadStub;
561+
562+
beforeEach(function() {
563+
noopStub = sinon.stub().resolves();
564+
cssLoadStub = sinon.stub().resolves();
565+
htmlLoadStub = sinon.stub().resolves();
566+
567+
Scraper = proxyquire('../../lib/scraper', {
568+
'underscore': {
569+
'noop': noopStub
570+
},
571+
'./file-handlers/html': htmlLoadStub,
572+
'./file-handlers/css': cssLoadStub
573+
});
574+
});
575+
576+
it('should return noop if resource has depth > max', function(done) {
577+
var s = new Scraper({
578+
urls: 'http://example.com',
579+
directory: testDirname,
580+
maxDepth: 2
581+
});
582+
583+
s.prepare().then(function() {
584+
var r = new Resource('http://example.com/');
585+
sinon.stub(r, 'getType').returns('html');
586+
sinon.stub(r, 'getDepth').returns(10);
587+
588+
s.getResourceHandler(r).call(s, r).then(function() {
589+
noopStub.called.should.be.eql(true);
590+
cssLoadStub.called.should.be.eql(false);
591+
htmlLoadStub.called.should.be.eql(false);
592+
593+
done();
594+
});
595+
}).catch(done);
596+
});
597+
598+
it('should return css loader if file has css type', function(done) {
599+
var s = new Scraper({
600+
urls: 'http://example.com',
601+
directory: testDirname,
602+
maxDepth: 2
603+
});
555604

605+
s.prepare().then(function() {
606+
var r = new Resource('http://example.com/');
607+
sinon.stub(r, 'getType').returns('css');
608+
sinon.stub(r, 'getDepth').returns(1);
609+
610+
s.getResourceHandler(r).call(s, r).then(function() {
611+
noopStub.called.should.be.eql(false);
612+
cssLoadStub.called.should.be.eql(true);
613+
htmlLoadStub.called.should.be.eql(false);
614+
615+
done();
616+
});
617+
}).catch(done);
618+
});
619+
620+
it('should return html & css loader if file has html type', function(done) {
621+
var s = new Scraper({
622+
urls: 'http://example.com',
623+
directory: testDirname,
624+
maxDepth: 2
625+
});
626+
627+
s.prepare().then(function() {
628+
var r = new Resource('http://example.com/');
629+
sinon.stub(r, 'getType').returns('html');
630+
sinon.stub(r, 'getDepth').returns(1);
631+
632+
s.getResourceHandler(r).call(s, r).then(function() {
633+
noopStub.called.should.be.eql(false);
634+
cssLoadStub.called.should.be.eql(true);
635+
htmlLoadStub.called.should.be.eql(true);
636+
637+
done();
638+
});
639+
}).catch(done);
640+
});
556641
});
557642

558643
describe('#scrape', function() {

0 commit comments

Comments
 (0)