Skip to content

Commit 26369f9

Browse files
author
Mark Lagendijk
committed
Merge pull request #67 from s0ph1e/createoutput-referential-loop-bug
Remove createOutputObject and return resources as-is
2 parents b6020d2 + 81bc0a8 commit 26369f9

File tree

6 files changed

+18
-58
lines changed

6 files changed

+18
-58
lines changed

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,10 +59,10 @@ Makes requests to `urls` and saves all files found with `sources` to `directory`
5959
**callback** - callback function *(optional)*, includes following parameters:
6060

6161
- `error`: if error - `Error` object, if success - `null`
62-
- `result`: if error - `null`, if success - array of objects containing:
62+
- `result`: if error - `null`, if success - array of [Resource](https://github.com/s0ph1e/node-website-scraper/blob/master/lib/resource.js) objects containing:
6363
- `url`: url of loaded page
6464
- `filename`: filename where page was saved (relative to `directory`)
65-
- `assets`: array of children resources (each of them contains `url`, `filename`, `assets`)
65+
- `children`: array of children Resources
6666

6767
### Filename Generators
6868
The filename generator determines where the scraped files are saved.

lib/scraper.js

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ var _ = require('lodash');
1010

1111
var defaults = require('./config/defaults');
1212
var recursiveSources = require('./config/recursive-sources');
13-
var utils = require('./utils.js');
1413
var Resource = require('./resource');
1514

1615
var getFilenameGenerator = require('./filename-generators/filename-generator-getter');
@@ -128,8 +127,8 @@ Scraper.prototype.prepare = function prepare () {
128127

129128
Scraper.prototype.load = function load () {
130129
var self = this;
131-
return Promise.map(self.originalResources, function loadPage (po) {
132-
return self.loadResource(po).then(utils.createOutputObject);
130+
return Promise.map(self.originalResources, function loadResource (resource) {
131+
return self.loadResource(resource);
133132
});
134133
};
135134

lib/utils.js

Lines changed: 1 addition & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
var url = require('url');
22
var path = require('path');
3-
var _ = require('lodash');
43
var Promise = require('bluebird');
54

65
function isUrl (path) {
@@ -42,29 +41,12 @@ function waitAllFulfilled (promises) {
4241
}));
4342
}
4443

45-
function createOutputObject (resource, outputObjectsByUrl) {
46-
outputObjectsByUrl = outputObjectsByUrl || {};
47-
48-
var outputObject = {
49-
url: resource.getUrl(),
50-
filename: resource.getFilename()
51-
};
52-
outputObjectsByUrl[outputObject.url] = outputObject;
53-
54-
outputObject.assets = _.map(resource.getChildren(), function getOrCreateChildOutputObject (childResource){
55-
return outputObjectsByUrl[childResource.getUrl()] || createOutputObject(childResource, outputObjectsByUrl);
56-
});
57-
58-
return outputObject;
59-
}
60-
6144
module.exports = {
6245
isUrl: isUrl,
6346
getUrl: getUrl,
6447
getUnixPath: getUnixPath,
6548
getRelativePath: getRelativePath,
6649
getFilenameFromUrl: getFilenameFromUrl,
6750
getHashFromUrl: getHashFromUrl,
68-
waitAllFulfilled: waitAllFulfilled,
69-
createOutputObject: createOutputObject
51+
waitAllFulfilled: waitAllFulfilled
7052
};

test/functional/base-test.js

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ var nock = require('nock');
33
var fs = require('fs-extra');
44
var cheerio = require('cheerio');
55
var scraper = require('../../index');
6+
var Resource = require('../../lib/resource');
67

78
var testDirname = __dirname + '/.base';
89
var mockDirname = __dirname + '/mocks/base';
@@ -68,16 +69,19 @@ describe('Functional base', function() {
6869
result.should.be.instanceOf(Array).and.have.length(3);
6970

7071
result[0].should.have.properties({ url: 'http://example.com/', filename: 'index.html' });
71-
result[0].should.have.properties('assets');
72-
result[0].assets.should.be.instanceOf(Array).and.have.length(4);
72+
result[0].should.have.properties('children');
73+
result[0].children.should.be.instanceOf(Array).and.have.length(4);
74+
result[0].children[0].should.be.instanceOf(Resource);
7375

7476
result[1].should.have.properties({ url: 'http://example.com/about', filename: 'about.html' });
75-
result[1].should.have.properties('assets');
76-
result[1].assets.should.be.instanceOf(Array).and.have.length(4);
77+
result[1].should.have.properties('children');
78+
result[1].children.should.be.instanceOf(Array).and.have.length(4);
79+
result[1].children[0].should.be.instanceOf(Resource);
7780

7881
result[2].should.have.properties({ url: 'http://blog.example.com/', filename: 'blog.html' }); // url after redirect
79-
result[2].should.have.properties('assets');
80-
result[2].assets.should.be.instanceOf(Array).and.have.length(1);
82+
result[2].should.have.properties('children');
83+
result[2].children.should.be.instanceOf(Array).and.have.length(1);
84+
result[2].children[0].should.be.instanceOf(Resource);
8185

8286
// should create directory and subdirectories
8387
fs.existsSync(testDirname).should.be.eql(true);

test/unit/scraper-test.js

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,7 @@ describe('Scraper', function () {
210210
}).catch(done);
211211
});
212212

213-
it('should return array of objects with url, filename and assets', function(done) {
213+
it('should return array of objects with url, filename and childre', function(done) {
214214
nock('http://first-url.com').get('/').reply(200, 'OK');
215215
nock('http://second-url.com').get('/').reply(500);
216216

@@ -225,8 +225,8 @@ describe('Scraper', function () {
225225
s.prepare().bind(s).then(s.load).then(function(res) {
226226
res.should.be.instanceOf(Array);
227227
res.should.have.length(2);
228-
res[0].should.have.properties(['url', 'filename', 'assets']);
229-
res[1].should.have.properties(['url', 'filename', 'assets']);
228+
res[0].should.be.instanceOf(Resource).and.have.properties(['url', 'filename', 'children']);
229+
res[1].should.be.instanceOf(Resource).and.have.properties(['url', 'filename', 'children']);
230230
done();
231231
}).catch(done);
232232
});

test/unit/utils-test.js

Lines changed: 0 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -83,29 +83,4 @@ describe('Common utils', function () {
8383
utils.getRelativePath('css/1.css', 'css/2.css').should.be.equal('2.css');
8484
});
8585
});
86-
87-
describe('#createOutputObject', function () {
88-
it('should create output object recursively', function() {
89-
var root = new Resource('http://google.com', 'google.html');
90-
root.createChild('http://child-one.com', 'child.html');
91-
root.createChild('http://child-two.com', 'child2.html');
92-
93-
var expected = {
94-
url: 'http://google.com',
95-
filename: 'google.html',
96-
assets: [{
97-
url: 'http://child-one.com',
98-
filename: 'child.html',
99-
assets: []
100-
}, {
101-
url: 'http://child-two.com',
102-
filename: 'child2.html',
103-
assets: []
104-
}]
105-
};
106-
107-
var result = utils.createOutputObject(root);
108-
result.should.eql(expected);
109-
});
110-
});
11186
});

0 commit comments

Comments
 (0)