Skip to content

Commit c07e581

Browse files
author
Zakaria RACHEDI
committed
Max items per category can be specify now froom CLI
1 parent 6622f44 commit c07e581

File tree

3 files changed

+44
-18
lines changed

3 files changed

+44
-18
lines changed

lib/app.js

Lines changed: 19 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ var url;
1212
var globalUrl;
1313
var itemCategory;
1414
var maxItem = 1;
15+
var all = true;
1516
var requestOpts = {
1617
url: '',
1718
method: 'GET',
@@ -40,6 +41,7 @@ function crawlerInit (cmdResponse) {
4041
var countdown = new Spinner('Crawler in progress... It could take some time ', ['⣾','⣽','⣻','⢿','⡿','⣟','⣯','⣷']);
4142
countdown.start();
4243
maxItem = cmdResponse.maxItem;
44+
all = cmdResponse.all;
4345
itemCategory = cmdResponse.category;
4446
cmdResponse.language == 'french' ? url = sw.cmdSwitch(itemCategory) : url = sw.cmdSwitchEn(itemCategory);
4547
cmdResponse.game == 'dofus' ? url = url : url = url.replace('https://www.dofus.com', 'https://www.dofus-touch.com');
@@ -52,35 +54,35 @@ function getPageLinks() {
5254
return request(requestOpts).then(function ($) {
5355
var links = [];
5456
$('tbody').find('tr').each(function(i, tr){
55-
if(i >= maxItem) return false;
57+
if(!all) if(i >= maxItem) return false;
5658
var link = globalUrl + $(this).find('td').eq(1).find('a').attr('href');
5759
links.push(link);
5860
});
5961
return links;
6062
}).then(function(links) {
61-
fsPath.writeFile('./data/links/' + itemCategory + '_links.json', JSON.stringify(links), function(err){
62-
if (err) console.log(err);
63-
console.log('\x1b[36m%s\x1b[0m' ,'\n SUCCESS : all item(s) links crawled.');
64-
console.log('\x1b[36m%s\x1b[0m' ,'\n START of item(s) crawling.');
65-
getItems(itemCategory, getLinksFromFile());
66-
});
63+
fsPath.writeFileSync('./data/links/' + itemCategory + '_links.json', JSON.stringify(links));
64+
console.log('\x1b[36m%s\x1b[0m' ,'\n SUCCESS : all item(s) links crawled.');
65+
console.log('\x1b[36m%s\x1b[0m' ,'\n START of item(s) crawling.');
66+
getItems(itemCategory, getLinksFromFile());
6767
}).catch(function(err) {
68-
console.log('\x1b[31m%s\x1b[0m' ,'/!\\Broken promise from getPageLinks');
69-
console.log(err);
70-
process.exit();
68+
console.log('\x1b[31m%s\x1b[0m' ,'/!\\Broken promise from getPageLinks');
69+
if(err.statusCode == '429') console.log('\x1b[31m%s\x1b[0m' ,'\n/!\\Error 429 detected ! You reached maximum request per hour, over pass it will provoke a ban IP from Ankama. Resume the parsing after 1h !'), process.exit();
70+
else if(err.code == 'ETIMEDOUT') console.log('\x1b[31m%s\x1b[0m' ,'\n /!\\ Error ETIMEDOUT detected ! Your connexion took too much time to respond.'), process.exit();
71+
else if(err.message == 'Error: read ECONNRESET') console.log('\x1b[31m%s\x1b[0m' ,'\n /!\\ Error ECONNRESET detected ! Connexion shutdown or reset, verify your internet connexion !'), process.exit();
72+
else if(err.message == 'Error: unable to verify the first certificate') console.log('\x1b[33m%s\x1b[0m' ,'/!\\However, don\'t worry: if your relaunch it, the app will resume the parsing from last item parsed ;)'), process.exit();
73+
else console.log(err), console.log('\x1b[31m%s\x1b[0m' ,'/!\\Broken promise from getPageLinks'), process.exit();
7174
});
7275
}
7376

7477
function getItems(category, links) {
7578
gi.getItems(category, links, function(items){
76-
if(fs.existsSync('./data/links/resume.json')) fs.unlinkSync('./data/links/resume.json');
7779
category = category.replace(/ /g,'');
78-
fsPath.writeFile('./data/' + category + '.json', JSON.stringify(items), function(err){
79-
if (err) console.log(err);
80-
console.log('\x1b[32m%s\x1b[0m' ,'\n SUCCESS : ' +items.length+ ' item(s) were crawled.');
81-
console.log('\x1b[33m%s\x1b[0m' ,'File ' + category +'.json' + ' was generated under "data/" folder.');
82-
process.exit();
83-
});
80+
if(fs.existsSync('./data/links/resume.json')) fs.unlinkSync('./data/links/resume.json');
81+
if(fs.existsSync('./data/links/' + category + '_links.json')) fs.unlinkSync('./data/links/' + category + '_links.json');
82+
fsPath.writeFileSync('./data/' + category + '.json', JSON.stringify(items));
83+
console.log('\x1b[32m%s\x1b[0m' ,'\n SUCCESS : ' +items.length+ ' item(s) were crawled.');
84+
console.log('\x1b[33m%s\x1b[0m' ,'File ' + category +'.json' + ' was generated under "data/" folder.');
85+
process.exit();
8486
});
8587
}
8688

lib/cli-view/cmd.js

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,11 +42,19 @@ var equipment = [
4242
return val.toLowerCase().replace(/ /g,'');
4343
}
4444
},
45+
{
46+
type: 'confirm',
47+
name: 'all',
48+
message: 'Do you want to parse [ALL/MAX] items of the category ?'
49+
},
4550
{
4651
type: 'input',
4752
name: 'maxItem',
4853
message: 'How many items do you want to parse ?',
4954
default: '1',
55+
when: function(answers) {
56+
return !answers.all;
57+
},
5058
validate: function(value) {
5159
var valid = !isNaN(parseFloat(value));
5260
return valid || 'Please enter a number';
@@ -65,11 +73,19 @@ var weapon = [
6573
return val.toLowerCase().replace(/ /g,'');
6674
}
6775
},
76+
{
77+
type: 'confirm',
78+
name: 'all',
79+
message: 'Do you want to parse [ALL/MAX] items of the category ?'
80+
},
6881
{
6982
type: 'input',
7083
name: 'maxItem',
7184
message: 'How many items do you want to parse ?',
7285
default: '1',
86+
when: function(answers) {
87+
return !answers.all;
88+
},
7389
validate: function(value) {
7490
var valid = !isNaN(parseFloat(value));
7591
return valid || 'Please enter a number';
@@ -79,11 +95,19 @@ var weapon = [
7995
];
8096

8197
var page = [
98+
{
99+
type: 'confirm',
100+
name: 'all',
101+
message: 'Do you want to parse [ALL/MAX] items of the category ?'
102+
},
82103
{
83104
type: 'input',
84105
name: 'maxItem',
85106
message: 'How many items do you want to parse ?',
86107
default: '1',
108+
when: function(answers) {
109+
return !answers.all;
110+
},
87111
validate: function(value) {
88112
var valid = !isNaN(parseFloat(value));
89113
return valid || 'Please enter a number';

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "crawlit",
3-
"version": "1.1.1",
3+
"version": "1.2.1",
44
"description": "Crawl dofus encyclopedia ",
55
"main": "lib/app.js",
66
"bin": "lib/app.js",

0 commit comments

Comments
 (0)