Skip to content

Commit 4b92006

Browse files
authored
Add saved and error callbacks (#190)
1 parent 423792d commit 4b92006

File tree

4 files changed

+86
-4
lines changed

4 files changed

+86
-4
lines changed

README.md

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,8 @@ scrape(options, (error, result) => {
5858
* [urlFilter](#urlfilter) - skip some urls
5959
* [filenameGenerator](#filenamegenerator) - generate filename for downloaded resource
6060
* [httpResponseHandler](#httpresponsehandler) - customize http response handling
61+
* [onResourceSaved](#onresourcesaved) - callback called when resource is saved
62+
* [onResourceError](#onresourceerror) - callback called when resource's downloading is failed
6163

6264
Default options you can find in [lib/config/defaults.js](https://github.com/s0ph1e/node-website-scraper/blob/master/lib/config/defaults.js).
6365

@@ -209,6 +211,30 @@ scrape({
209211
```
210212
Scrape function resolves with array of [Resource](https://github.com/s0ph1e/node-website-scraper/blob/master/lib/resource.js) objects which contain `metadata` property from `httpResponseHandler`.
211213

214+
#### onResourceSaved
215+
Function called each time when resource is saved to file system. Callback is called with [Resource](https://github.com/s0ph1e/node-website-scraper/blob/master/lib/resource.js) object. Defaults to `null` - no callback will be called.
216+
```javascript
217+
scrape({
218+
urls: ['http://example.com/'],
219+
directory: '/path/to/save',
220+
onResourceSaved: (resource) => {
221+
console.log(`Resource ${resource} was saved to fs`);
222+
}
223+
})
224+
```
225+
226+
#### onResourceError
227+
Function called each time when resource's downloading/handling/saving to fs was failed. Callback is called with - [Resource](https://github.com/s0ph1e/node-website-scraper/blob/master/lib/resource.js) object and `Error` object. Defaults to `null` - no callback will be called.
228+
```javascript
229+
scrape({
230+
urls: ['http://example.com/'],
231+
directory: '/path/to/save',
232+
onResourceError: (resource, err) => {
233+
console.log(`Resource ${resource} was not saved because of ${err}`);
234+
}
235+
})
236+
```
237+
212238
## callback
213239
Callback function, optional, includes following parameters:
214240
- `error`: if error - `Error` object, if success - `null`

lib/config/defaults.js

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,9 @@ var config = {
3434
recursive: false,
3535
maxDepth: null,
3636
ignoreErrors: true,
37-
httpResponseHandler: null
37+
httpResponseHandler: null,
38+
onResourceSaved: null,
39+
onResourceError: null
3840
};
3941

4042
module.exports = config;

lib/scraper.js

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -67,9 +67,13 @@ Scraper.prototype.saveResource = function saveResource (resource) {
6767
}).then(function fileHandled () {
6868
logger.info('saving resource ' + resource + ' to fs');
6969
return self.fsAdapter.saveResource(resource);
70+
}).then(function afterResourceSaved () {
71+
if (self.options.onResourceSaved) {
72+
self.options.onResourceSaved(resource);
73+
}
7074
}).catch(function handleError (err) {
7175
logger.warn('failed to save resource ' + resource);
72-
return self.handleError(err);
76+
return self.handleError(err, resource);
7377
});
7478
};
7579

@@ -113,7 +117,7 @@ Scraper.prototype.createNewRequest = function createNewRequest (resource) {
113117
return resource;
114118
}).catch(function handleError (err) {
115119
logger.warn('failed to request resource ' + resource);
116-
return self.handleError(err);
120+
return self.handleError(err, resource);
117121
});
118122

119123
self.requestedResourcePromises.set(url, requestPromise);
@@ -169,7 +173,10 @@ Scraper.prototype.waitForLoad = function waitForLoad () {
169173
return Promise.resolve(self.originalResources);
170174
};
171175

172-
Scraper.prototype.handleError = function handleError (err) {
176+
Scraper.prototype.handleError = function handleError (err, resource) {
177+
if (resource && this.options.onResourceError) {
178+
this.options.onResourceError(resource, err);
179+
}
173180
if (this.options.ignoreErrors) {
174181
logger.warn('ignoring error: ' + err.message);
175182
return Promise.resolve(null);
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
const should = require('should');
2+
const nock = require('nock');
3+
const fs = require('fs-extra');
4+
const sinon = require('sinon');
5+
const scrape = require('../../../index');
6+
7+
const testDirname = __dirname + '/.tmp';
8+
const mockDirname = __dirname + '/mocks';
9+
10+
describe('Functional callbacks', () => {
11+
12+
beforeEach(() => {
13+
nock.cleanAll();
14+
nock.disableNetConnect();
15+
});
16+
17+
afterEach(() => {
18+
nock.cleanAll();
19+
nock.enableNetConnect();
20+
fs.removeSync(testDirname);
21+
});
22+
23+
it('should call onResourceSaved callback and onResourceError callback', function() {
24+
nock('http://example.com/').get('/').reply(200, 'OK');
25+
nock('http://nodejs.org/').get('/').replyWithError('REQUEST ERROR!!');
26+
27+
const resourceSavedStub = sinon.stub();
28+
const resourceErrorStub = sinon.stub();
29+
30+
const options = {
31+
urls: [ 'http://example.com/', 'http://nodejs.org/' ],
32+
directory: testDirname,
33+
subdirectories: null,
34+
onResourceSaved: resourceSavedStub,
35+
onResourceError: resourceErrorStub
36+
};
37+
38+
return scrape(options).then(function() {
39+
should(resourceSavedStub.calledOnce).be.eql(true);
40+
should(resourceSavedStub.args[0][0].url).be.eql('http://example.com/');
41+
42+
should(resourceErrorStub.calledOnce).be.eql(true);
43+
should(resourceErrorStub.args[0][0].url).be.eql('http://nodejs.org/');
44+
should(resourceErrorStub.args[0][1].message).be.eql('REQUEST ERROR!!');
45+
});
46+
});
47+
});

0 commit comments

Comments
 (0)