Skip to content

Commit 3b8a025

Browse files
authored
Add custom resource saver (#201)
1 parent cbc9f00 commit 3b8a025

File tree

16 files changed

+428
-296
lines changed

16 files changed

+428
-296
lines changed

.travis.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ node_js:
44
- '4'
55
- '5'
66
- '6'
7+
- '7'
78
after_success:
89
- codeclimate-test-reporter < coverage/lcov.info
910
- coveralls < coverage/lcov.info

README.md

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ scrape(options, (error, result) => {
5858
* [urlFilter](#urlfilter) - skip some urls
5959
* [filenameGenerator](#filenamegenerator) - generate filename for downloaded resource
6060
* [httpResponseHandler](#httpresponsehandler) - customize http response handling
61+
* [resourceSaver](#resourcesaver) - customize resources saving
6162
* [onResourceSaved](#onresourcesaved) - callback called when resource is saved
6263
* [onResourceError](#onresourceerror) - callback called when resource's downloading is failed
6364

@@ -211,6 +212,19 @@ scrape({
211212
```
212213
Scrape function resolves with array of [Resource](https://github.com/s0ph1e/node-website-scraper/blob/master/lib/resource.js) objects which contain `metadata` property from `httpResponseHandler`.
213214

215+
#### resourceSaver
216+
Class which saves [Resources](https://github.com/s0ph1e/node-website-scraper/blob/master/lib/resource.js), should have methods `saveResource` and `errorCleanup` which return Promises. Use it to save files where you need: to dropbox, amazon S3, existing directory, etc. By default all files are saved in local file system to new directory passed in `directory` option (see [lib/resource-saver/index.js](https://github.com/s0ph1e/node-website-scraper/blob/master/lib/resource-saver/index.js)).
217+
```javascript
218+
scrape({
219+
urls: ['http://example.com/'],
220+
directory: '/path/to/save',
221+
resourceSaver: class MyResourceSaver {
222+
saveResource (resource) {/* code to save file where you need */}
223+
errorCleanup (err) {/* code to remove all previously saved files in case of error */}
224+
}
225+
}).then(console.log).catch(console.log);
226+
```
227+
214228
#### onResourceSaved
215229
Function called each time when resource is saved to file system. Callback is called with [Resource](https://github.com/s0ph1e/node-website-scraper/blob/master/lib/resource.js) object. Defaults to `null` - no callback will be called.
216230
```javascript

appveyor.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ environment:
33
- nodejs_version: "4"
44
- nodejs_version: "5"
55
- nodejs_version: "6"
6+
- nodejs_version: "7"
67

78
install:
89
- ps: Install-Product node $env:nodejs_version

index.js

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,12 @@
1-
var Scraper = require('./lib/scraper.js');
1+
'use strict';
22

3-
module.exports = function scrape (options, callback) {
4-
return new Scraper(options).scrape(callback);
3+
const Promise = require('bluebird');
4+
const Scraper = require('./lib/scraper.js');
5+
6+
module.exports = (options, callback) => {
7+
return Promise.try(() => {
8+
return new Scraper(options).scrape(callback);
9+
});
510
};
611

712
module.exports.defaults = Scraper.defaults;

lib/config/defaults.js

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
var config = {
1+
'use strict';
2+
const config = {
23
filenameGenerator: 'byType',
34
defaultFilename: 'index.html',
45
prettifyUrls: false,
@@ -51,7 +52,8 @@ var config = {
5152
ignoreErrors: true,
5253
httpResponseHandler: null,
5354
onResourceSaved: null,
54-
onResourceError: null
55+
onResourceError: null,
56+
resourceSaver: null
5557
};
5658

5759
module.exports = config;

lib/fs-adaper.js

Lines changed: 0 additions & 59 deletions
This file was deleted.

lib/resource-saver/index.js

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
'use strict';
2+
3+
const path = require('path');
4+
const _ = require('lodash');
5+
const Promise = require('bluebird');
6+
7+
const fs = require('fs-extra');
8+
const outputFileAsync = Promise.promisify(fs.outputFile);
9+
const removeAsync = Promise.promisify(fs.remove);
10+
11+
const supportedOptions = [ 'directory' ];
12+
13+
class ResourceSaver {
14+
constructor (options) {
15+
this.options = _.pick(options, supportedOptions);
16+
17+
if (!this.options.directory || typeof this.options.directory !== 'string') {
18+
throw new Error('Incorrect directory ' + this.options.directory);
19+
}
20+
21+
this.absoluteDirectoryPath = path.resolve(process.cwd(), this.options.directory);
22+
23+
if (exists(this.absoluteDirectoryPath)) {
24+
throw new Error('Directory ' + this.absoluteDirectoryPath + ' exists');
25+
}
26+
27+
this.loadedResources = [];
28+
}
29+
30+
/**
31+
* Save resource to file system
32+
* @param {Resource} resource
33+
* @returns {Promise}
34+
*/
35+
saveResource (resource) {
36+
const filename = path.join(this.absoluteDirectoryPath, resource.getFilename());
37+
const text = resource.getText();
38+
return outputFileAsync(filename, text, { encoding: 'binary' }).then(() => {
39+
this.loadedResources.push(resource);
40+
});
41+
}
42+
43+
/**
44+
* Remove all files that were saved before
45+
* @returns {Promise}
46+
*/
47+
errorCleanup () {
48+
if (!_.isEmpty(this.loadedResources)) {
49+
return removeAsync(this.absoluteDirectoryPath);
50+
}
51+
return Promise.resolve();
52+
}
53+
}
54+
55+
function exists (path) {
56+
let exists;
57+
try {
58+
if (fs.statSync(path)) {
59+
exists = true;
60+
}
61+
} catch (e) {
62+
if (e.code === 'ENOENT') {
63+
exists = false;
64+
} else {
65+
throw e;
66+
}
67+
}
68+
69+
return exists;
70+
}
71+
72+
module.exports = ResourceSaver;

0 commit comments

Comments
 (0)