Skip to content

Commit 9b61a69

Browse files
committed
Rewrote and refactored validator.js
1 parent a38d593 commit 9b61a69

File tree

4 files changed

+127
-118
lines changed

4 files changed

+127
-118
lines changed

spider5/cmdconfig.js

Lines changed: 27 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -5,57 +5,57 @@
55

66
function fullURL(url)
77
{
8-
if (!url.startsWith('http'))
9-
return 'http://' + url;
8+
if (!url.startsWith("http"))
9+
return "http://" + url;
1010
return url;
1111
}
1212

1313
const cmdOptions = [
14-
{ name: 'help',
15-
description: 'Display usage guide.',
16-
alias: 'h',
14+
{ name: "help",
15+
description: "Display usage guide.",
16+
alias: "h",
1717
type: Boolean,
18-
group: 'main'},
19-
{ name: 'nesting',
18+
group: "main"},
19+
{ name: "nesting",
2020
type: Number,
21-
alias: 'n',
22-
group: 'main',
23-
description: 'Depth to which the starting url is to be traversed'},
24-
{ name: 'concurrency',
21+
alias: "n",
22+
group: "main",
23+
description: "Depth to which the starting url is to be traversed"},
24+
{ name: "concurrency",
2525
type: Number,
26-
alias: 'c',
27-
group: 'main',
28-
description: 'Number of concurrent requests' },
29-
{ name: 'url',
26+
alias: "c",
27+
group: "main",
28+
description: "Number of concurrent requests" },
29+
{ name: "url",
3030
type: url => fullURL(url),
31-
alias: 'u',
31+
alias: "u",
3232
defaultOption: true,
33-
group: 'main',
34-
description:'Url to be traversed'}
33+
group: "main",
34+
description:"Url to be traversed"}
3535
];
3636

3737

3838
const sections = [
3939
{
40-
header: 'Webcrawler app',
41-
content: 'Crawls the url provided and downloads page links recursively to the nesting level specified.'
40+
header: "Webcrawler app",
41+
content: "Crawls the url provided and downloads page links recursively to the nesting level specified."
4242
},
4343
{
44-
header: 'Main options',
44+
header: "Main options",
4545
optionList: cmdOptions,
46-
group: [ 'main' ]
46+
group: [ "main" ]
4747
},
4848
{
49-
header: 'Misc',
49+
header: "Misc",
5050
optionList: cmdOptions,
51-
group: '_none'
51+
group: "_none"
5252
}
5353
];
5454

55-
const config = require('config');
56-
const commandLineArgs = require('command-line-args');
55+
const config = require("config");
56+
const commandLineArgs = require("command-line-args");
5757
const options = commandLineArgs(cmdOptions);
58-
const commandLineUsage = require('command-line-usage');
58+
const commandLineUsage = require("command-line-usage");
5959
const usage = commandLineUsage(sections);
6060
// The command line options
6161
module.exports.options = options;

spider5/spider.js

Lines changed: 29 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -4,25 +4,23 @@
44
/*jshint latedef: false */
55
"use strict";
66

7-
const request = require('request');
8-
const fs = require('fs');
9-
const mkdirp = require('mkdirp');
10-
const path = require('path');
11-
const utilities = require('./utilities');
12-
const cmdConfig = require('./cmdconfig');
13-
const TaskQueue = require('./taskQueue');
14-
const validator = require('./validator');
7+
const request = require("request");
8+
const fs = require("fs");
9+
const mkdirp = require("mkdirp");
10+
const path = require("path");
11+
const utilities = require("./utilities");
12+
const cmdConfig = require("./cmdconfig");
13+
const TaskQueue = require("./taskQueue");
14+
const validator = require("./validator");
1515

1616
function spiderLinks(currentUrl, body, nesting, callback) {
17-
if(nesting === 0) {
17+
if(nesting === 0)
1818
return process.nextTick(callback);
19-
}
2019

2120
const links = utilities.getPageLinks(currentUrl, body);
22-
if(links.length === 0) {
21+
if(links.length === 0)
2322
return process.nextTick(callback);
24-
}
25-
let downloadQueue = new TaskQueue(cmdConfig.get('concurrency',2));
23+
let downloadQueue = new TaskQueue(cmdConfig.get("concurrency",2));
2624

2725
let completed = 0, hasErrors = false;
2826
links.forEach(link => {
@@ -74,9 +72,9 @@ function spider(url, nesting, callback) {
7472
spidering.set(url, true);
7573

7674
const filename = utilities.urlToFilename(url);
77-
fs.readFile(filename, 'utf8', function(err, body) {
75+
fs.readFile(filename, "utf8", function(err, body) {
7876
if(err) {
79-
if(err.code !== 'ENOENT') {
77+
if(err.code !== "ENOENT") {
8078
return callback(err);
8179
}
8280

@@ -93,14 +91,24 @@ function spider(url, nesting, callback) {
9391
});
9492
}
9593

96-
if (!validator.validate())
97-
process.exit();
94+
const errors = validator.validate();
95+
if (errors.length || cmdConfig.get("help"))
96+
{
97+
console.log(cmdConfig.usage);
98+
errors.forEach((err) =>
99+
{
100+
console.error(err);
101+
});
102+
process.exit(errors.length);
103+
}
98104

99-
spider(cmdConfig.get('url'), cmdConfig.get('nesting',1), (err) => {
105+
spider(cmdConfig.get("url"), cmdConfig.get("nesting",1), (err) => {
100106
if(err) {
101-
console.log(err);
102-
process.exit();
107+
console.error(err);
108+
process.exit(1);
103109
} else {
104-
console.log('Download complete');
110+
console.log("Download complete");
111+
process.exit(0);
105112
}
106113
});
114+

spider5/utilities.js

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,25 +3,25 @@
33
/*jshint esversion: 6*/
44
"use strict";
55

6-
const urlParse = require('url').parse;
7-
const urlResolve = require('url').resolve;
8-
const slug = require('slug');
9-
const path = require('path');
10-
const cheerio = require('cheerio');
6+
const urlParse = require("url").parse;
7+
const urlResolve = require("url").resolve;
8+
const slug = require("slug");
9+
const path = require("path");
10+
const cheerio = require("cheerio");
1111

1212
module.exports.urlToFilename = function urlToFilename(url) {
1313
const parsedUrl = urlParse(url);
14-
const urlPath = parsedUrl.path.split('/')
14+
const urlPath = parsedUrl.path.split("/")
1515
.filter(function(component) {
16-
return component !== '';
16+
return component !== "";
1717
})
1818
.map(function(component) {
1919
return slug(component);
2020
})
21-
.join('/');
21+
.join("/");
2222
let filename = path.join(parsedUrl.hostname, urlPath);
2323
if(!path.extname(filename).match(/htm/)) {
24-
filename += '.html';
24+
filename += ".html";
2525
}
2626
return filename;
2727
};
@@ -37,7 +37,7 @@ module.exports.getLinkUrl = function getLinkUrl(currentUrl, element) {
3737
};
3838

3939
module.exports.getPageLinks = function getPageLinks(currentUrl, body) {
40-
return [].slice.call(cheerio.load(body)('a'))
40+
return [].slice.call(cheerio.load(body)("a"))
4141
.map(function(element) {
4242
return module.exports.getLinkUrl(currentUrl, element);
4343
})

spider5/validator.js

Lines changed: 61 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -2,66 +2,67 @@
22
/*jshint node: true */
33
/*jshint esversion: 6 */
44
"use strict";
5-
const validator = require('validator');
6-
const cmdConfig = require('./cmdconfig');
7-
const assert = require('assert');
5+
const validator = require("validator");
6+
const cmdConfig = require("./cmdconfig");
7+
const assert = require("assert");
88

9-
module.exports.validate = function()
10-
{
11-
const options = cmdConfig.options;
12-
let assertCount = 0;
13-
function inc()
14-
{
15-
assertCount++;
16-
return assertCount;
17-
}
18-
try{
19-
assert(options._all.url,'No url specified');
20-
}
21-
catch(err)
22-
{
23-
inc();
24-
console.error(err.message);
25-
}
26-
if (options._all.url)
27-
{
28-
try{
29-
assert(validator.isURL(options._all.url,{protocols:['http','https'],require_host: true, require_valid_protocol:true,require_protocols:true}),options._all.url + ' is invalid.');
30-
}
31-
catch(err)
32-
{
33-
inc();
34-
console.error(err.message);
35-
}
36-
}
37-
if (options._all.concurrency !== undefined)
38-
try
39-
{
40-
assert(validator.isInt(options._all.concurrency.toString(),{min:1}),'Concurrency must be greater than 0');
41-
}
42-
catch(err)
43-
{
44-
inc();
45-
console.error(err.message);
46-
}
9+
module.exports.validate = function() {
10+
const options = cmdConfig.options;
11+
let errors = [];
12+
errors = validateEmptyURL(options._all.url, errors);
13+
errors = validateURLFormat(options._all.url, errors);
14+
errors = validateConcurrency(options._all.concurrency, errors);
15+
errors = validateNesting(options._all.nesting, errors);
16+
return errors;
17+
};
4718

48-
if (options._all.nesting !== undefined)
49-
{
50-
try {
51-
assert(validator.isInt(options._all.nesting.toString(),{gt:0}),'Nesting must be greater than 0');
52-
}
53-
catch(err)
54-
{
55-
inc();
56-
console.error(err.message);
57-
}
58-
}
19+
function validateEmptyURL(url, errors) {
20+
try {
21+
assert(url, "No url specified");
22+
} catch (err) {
23+
errors.push(err.message);
24+
}
25+
return errors;
26+
}
5927

60-
if (assertCount || options._all.help)
61-
{
62-
console.error(cmdConfig.usage);
63-
return false;
64-
}
65-
66-
return true;
67-
};
28+
function validateURLFormat(url, errors) {
29+
if (url) {
30+
try {
31+
assert(validator.isURL(url, {
32+
protocols: ["http", "https"],
33+
require_host: true,
34+
require_valid_protocol: true,
35+
require_protocols: true
36+
}), url + " is invalid.");
37+
} catch (err) {
38+
errors.push(err.message);
39+
}
40+
}
41+
return errors;
42+
}
43+
44+
function validateConcurrency(concurrency, errors) {
45+
if (concurrency !== undefined) {
46+
try {
47+
assert(validator.isInt(concurrency.toString(), {
48+
min: 1
49+
}), "Concurrency must be greater than 0");
50+
} catch (err) {
51+
errors.push(err.message);
52+
}
53+
}
54+
return errors;
55+
}
56+
57+
function validateNesting(nesting, errors) {
58+
if (nesting !== undefined) {
59+
try {
60+
assert(validator.isInt(nesting.toString(), {
61+
gt: 0
62+
}), "Nesting must be greater than 0");
63+
} catch (err) {
64+
errors.push(err.message);
65+
}
66+
}
67+
return errors;
68+
}

0 commit comments

Comments
 (0)