Skip to content

Commit 575cd29

Browse files
committed
Moved function out of loop
1 parent e2a15a3 commit 575cd29

File tree

1 file changed

+120
-133
lines changed

1 file changed

+120
-133
lines changed

spider4/spider.js

Lines changed: 120 additions & 133 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
/*jshint node: true */
33
/*jshint esversion: 6 */
44
/*jshint latedef: false */
5+
/* jshint expr: true */
56
"use strict";
67
const request = require('request');
78
const fs = require('fs');
@@ -11,158 +12,144 @@ const utilities = require('./utilities');
1112
const debug = require('debug')('spider');
1213
debug.enabled = false;
1314
var downloaded = false;
14-
var spidering = new Map();
15+
var spidering = new Map();
1516
var errors = [];
16-
var url,concurrency;
17-
function saveFile(filename,body,callback)
18-
{
19-
mkdirp(path.dirname(filename), err => {
17+
var url, concurrency;
18+
19+
function saveFile(filename, body, callback) {
20+
mkdirp(path.dirname(filename), err => {
21+
if (err)
22+
return callback(err);
23+
fs.writeFile(filename, body, err => {
2024
if (err)
21-
return callback(err);
22-
fs.writeFile(filename, body, err => {
23-
if (err)
24-
return callback(err);
25-
return callback(null, filename, true);
26-
});
25+
return callback(err);
26+
return callback(null, filename, true);
27+
});
2728
});
2829
}
2930

30-
function download(url,filename,callback)
31-
{
32-
console.log(`Downloading ${url} to ${filename}`);
33-
request(url, (err, response, body) => { if (err)
34-
return callback(err);
35-
saveFile(filename,body,err => {
36-
console.log("Downloaded and saved " + url + " to ${filename}");
37-
if (err)
38-
return callback(err);
39-
callback(null,body);
31+
function download(url, filename, callback) {
32+
console.log(`Downloading ${url} to ${filename}`);
33+
request(url, (err, response, body) => {
34+
if (err)
35+
return callback(err);
36+
saveFile(filename, body, err => {
37+
console.log(`Downloaded and saved ${url} to ${filename}`);
38+
if (err)
39+
return callback(err);
40+
callback(null, body);
41+
});
4042
});
41-
});
42-
downloaded = true;
43+
downloaded = true;
4344
}
4445

45-
function spider( url, nesting, callback)
46-
{
47-
if (spidering.has(url))
48-
return process.nextTick(callback);
49-
spidering.set(url,true);
50-
const filename = utilities.urlToFilename( url);
51-
fs.readFile( filename, 'utf8', function( err, body) {
52-
if (err) {
53-
if (err.code !== 'ENOENT')
54-
return callback( err,filename,false);
55-
return download( url, filename, function( err, body) {
56-
if( err)
57-
return callback( err,filename,false);
58-
spiderLinks( url, body, nesting, callback);
59-
60-
});
61-
}
62-
63-
spiderLinks( url, body, nesting, callback);
64-
65-
66-
});
46+
function spider(url, nesting, callback) {
47+
if (spidering.has(url))
48+
return process.nextTick(callback);
49+
spidering.set(url, true);
50+
const filename = utilities.urlToFilename(url);
51+
fs.readFile(filename, 'utf8', function(err, body) {
52+
if (err) {
53+
if (err.code !== 'ENOENT')
54+
return callback(err, filename, false);
55+
return download(url, filename, function(err, body) {
56+
if (err)
57+
return callback(err, filename, false);
58+
spiderLinks(url, body, nesting, callback);
59+
60+
});
61+
}
62+
63+
spiderLinks(url, body, nesting, callback);
64+
65+
66+
});
6767
}
68-
69-
function spiderLinks( currentUrl, body, nesting, callback) {
70-
if( nesting === 0)
71-
return process.nextTick( callback,null,currentUrl,downloaded);
72-
73-
var links = utilities.getPageLinks( currentUrl, body);
74-
if (links.length === 0)
75-
return process.nextTick(callback,null,currentUrl,downloaded);
76-
let completed = 0;
77-
let running = 0;
78-
let index = 0;
79-
let inError = false;
80-
let error = null;
81-
function done( err)
82-
{
83-
if( err)
84-
{
85-
inError = true;
86-
return callback( err);
87-
}
88-
return callback(null,url,downloaded);
89-
}
90-
function next() {
91-
while( running < concurrency && index < links.length) {
92-
const link = links[index++];
93-
94-
spider(link,nesting-1, function(err) {
95-
if (err)
96-
{
97-
inError = true;
98-
error = err;
99-
return callback(err);
100-
}
101-
if ( completed === links.length && !inError)
102-
return done();
103-
completed++, running--;next();
104-
});
105-
running++;
106-
}
107-
if ( completed === links.length && !inError)
108-
return done();
109-
}
110-
next();
11168

69+
function spiderLinks(currentUrl, body, nesting, callback) {
70+
if (nesting === 0)
71+
return process.nextTick(callback, null, currentUrl, downloaded);
72+
73+
var links = utilities.getPageLinks(currentUrl, body);
74+
if (links.length === 0)
75+
return process.nextTick(callback, null, currentUrl, downloaded);
76+
let completed = 0;
77+
let running = 0;
78+
let index = 0;
79+
let inError = false;
80+
let error = null;
81+
82+
function done(err) {
83+
if (err) {
84+
inError = true;
85+
return callback(err);
86+
}
87+
return callback(null, url, downloaded);
88+
}
89+
90+
function spidered(err) {
91+
if (err) {
92+
inError = true;
93+
error = err;
94+
return callback(err);
95+
}
96+
if (completed === links.length && !inError)
97+
return done();
98+
completed++, running--;
99+
next();
100+
}
101+
102+
function next() {
103+
while (running < concurrency && index < links.length) {
104+
const link = links[index++];
105+
spider(link, nesting - 1, spidered);
106+
running++;
107+
}
108+
if (completed === links.length && !inError)
109+
return done();
110+
}
111+
next();
112112
}
113113

114-
function exitMessage()
115-
{
114+
function exitMessage() {
116115
console.error('Usage: node spider.js url {level} {concurrency}.\nLevel defaults to 1.\nConcurrency defaults to 2.');
117116
process.exit(1);
118117
}
119118

120119
url = process.argv[2];
121120
var level;
122-
if (process.argv[3])
123-
{
124-
level = parseInt(process.argv[3]);
125-
126-
if (isNaN(level) || level <= 0)
127-
exitMessage();
128-
}
129-
else
130-
level = 1;
131-
if (process.argv[4])
132-
{
133-
concurrency = parseInt(process.argv[4]);
134-
135-
if (isNaN(level) || concurrency <= 0)
136-
exitMessage();
137-
}
138-
else
139-
concurrency = 2;
140-
if (url)
141-
{
142-
spider(url, level,(err, filename, downloaded) => {
143-
144-
if (err) {
145-
console.log(err);
146-
errors.push(err);
147-
} else if (downloaded) {
148-
console.log(`Completed the download of "${url}"`);
149-
150-
} else {
151-
console.log(`"${url}" has already been downloaded`);
152-
}
153-
if (errors.length)
154-
{
155-
console.log("Check errors. Redownload if necessary.");
156-
errors.forEach(function (error)
157-
{
158-
console.log(error.name + ':' + error.message);
159-
});
160-
}
161-
});
162-
}
163-
else
164-
exitMessage();
121+
if (process.argv[3]) {
122+
level = parseInt(process.argv[3]);
165123

124+
if (isNaN(level) || level <= 0)
125+
exitMessage();
126+
} else
127+
level = 1;
128+
if (process.argv[4]) {
129+
concurrency = parseInt(process.argv[4]);
166130

131+
if (isNaN(level) || concurrency <= 0)
132+
exitMessage();
133+
} else
134+
concurrency = 2;
135+
if (url) {
136+
spider(url, level, (err, filename, downloaded) => {
167137

138+
if (err) {
139+
console.log(err);
140+
errors.push(err);
141+
} else if (downloaded) {
142+
console.log(`Completed the download of "${url}"`);
168143

144+
} else {
145+
console.log(`"${url}" has already been downloaded`);
146+
}
147+
if (errors.length) {
148+
console.log("Check errors. Redownload if necessary.");
149+
errors.forEach(function(error) {
150+
console.log(error.name + ':' + error.message);
151+
});
152+
}
153+
});
154+
} else
155+
exitMessage();

0 commit comments

Comments
 (0)