Skip to content

Commit 371668c

Browse files
author
Zakaria RACHEDI
committed
Complete and deep restructuration of the parsing method + Resume feature
Parser is based now on item's number and not anymore on page !
1 parent 4286330 commit 371668c

File tree

5 files changed

+173
-176
lines changed

5 files changed

+173
-176
lines changed

.gitignore

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
/dist
2+
/data
3+
/logs
4+
/npm-debug.log
5+
/node_modules
6+
.DS_Store
7+
.vscode

lib/app.js

Lines changed: 37 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,7 @@ var CLI = require('clui'),
1212
var url;
1313
var globalUrl;
1414
var itemCategory;
15-
var currentPage = 1;
16-
var pageslinks = [];
15+
var maxItem = 1;
1716
var requestOpts = {
1817
url: '',
1918
method: 'GET',
@@ -26,105 +25,75 @@ main();
2625

2726
function main() {
2827
asciiArt();
29-
cm.getCategory().then(
30-
function(cmdResponse) {
31-
crawlerInit(cmdResponse);
32-
}).catch(
33-
function(err) {
34-
console.log('\x1b[31m%s\x1b[0m' ,'/!\\Broken promise from cmd');
35-
console.log(err);
36-
process.exit();
37-
});
28+
var ifResume = fs.existsSync('./data/links/resume.json') ? true : false;
29+
cm.getCategory(ifResume).then(
30+
function(cmdResponse) {
31+
if(cmdResponse == "yes") resumeLastParse();
32+
else crawlerInit(cmdResponse);
33+
}).catch(
34+
function(err) {
35+
console.log('\x1b[31m%s\x1b[0m' ,'/!\\Broken promise from cmd resume');
36+
console.log(err);
37+
process.exit();
38+
});
3839
}
3940

4041
function crawlerInit (cmdResponse) {
4142
cmdResponse = JSON.parse(cmdResponse);
4243
var countdown = new Spinner('Crawler in progress... It could take some time ', ['⣾','⣽','⣻','⢿','⡿','⣟','⣯','⣷']);
4344
countdown.start();
44-
maxPage = cmdResponse.pages;
45-
currentPage = cmdResponse.fromPage;
45+
maxItem = cmdResponse.maxItem;
4646
itemCategory = cmdResponse.category;
4747
cmdResponse.language == 'french' ? url = sw.cmdSwitch(itemCategory) : url = sw.cmdSwitchEn(itemCategory);
4848
cmdResponse.game == 'dofus' ? url = url : url = url.replace('https://www.dofus.com', 'https://www.dofus-touch.com');
4949
globalUrl = url.substring(0, url.indexOf(".com/") + 4);
5050
requestOpts.url = url;
51-
var realMaxPagePromise = request(requestOpts).then(function ($) {
52-
var realMaxPage = $('div.text-center.ak-pagination.hidden-xs').find('ul.ak-pagination.pagination.ak-ajaxloader li:last-child').prev().prev().text().trim();
53-
return realMaxPage;
54-
});
55-
getAllLinks(realMaxPagePromise);
56-
}
57-
58-
function getAllLinks(realMaxPagePromise) {
59-
realMaxPagePromise.then(
60-
function(realMaxPage) {
61-
if (realMaxPage == '') {
62-
realMaxPage = 1;
63-
}
64-
if (realMaxPage >= maxPage && currentPage <= maxPage) {
65-
var callback = function(values) {
66-
pageslinks.push(values);
67-
currentPage++;
68-
if(currentPage <= maxPage) {
69-
getPageLinks(currentPage, callback);
70-
}else {
71-
pageslinks = concatToOneArray(pageslinks);
72-
console.log('\x1b[36m%s\x1b[0m' ,'\n SUCCESS : all item(s) links crawled.');
73-
console.log('\x1b[36m%s\x1b[0m' ,'\n START of item(s) crawling.');
74-
getItems(pageslinks);
75-
}
76-
}
77-
getPageLinks(currentPage, callback);
78-
}else {
79-
console.log('\x1b[31m%s\x1b[0m' ,'\n /!\\ Max page of this category is ' + realMaxPage + ' so '+ maxPage + ' is to much :(');
80-
process.exit();
81-
}
82-
83-
}).catch(
84-
function(err) {
85-
console.log(err);
86-
console.log('\x1b[31m%s\x1b[0m' ,'/!\\Broken promise from getAllLinks');
87-
process.exit();
88-
});
51+
getPageLinks();
8952
}
9053

91-
function getPageLinks(currentPage, callback) {
92-
requestOpts.url = url + 'page=' + currentPage;
54+
function getPageLinks() {
9355
return request(requestOpts).then(function ($) {
9456
var links = [];
9557
$('tbody').find('tr').each(function(i, tr){
58+
if(i >= maxItem) return false;
9659
var link = globalUrl + $(this).find('td').eq(1).find('a').attr('href');
9760
links.push(link);
9861
});
9962
return links;
100-
}).then(function(val) {
101-
callback(val);
63+
}).then(function(links) {
64+
fsPath.writeFile('./data/links/' + itemCategory + '_links.json', JSON.stringify(links), function(err){
65+
if (err) console.log(err);
66+
console.log('\x1b[36m%s\x1b[0m' ,'\n SUCCESS : all item(s) links crawled.');
67+
console.log('\x1b[36m%s\x1b[0m' ,'\n START of item(s) crawling.');
68+
getItems(itemCategory, getLinksFromFile());
69+
});
10270
}).catch(function(err) {
10371
console.log('\x1b[31m%s\x1b[0m' ,'/!\\Broken promise from getPageLinks');
10472
console.log(err);
10573
process.exit();
10674
});
10775
}
10876

109-
function getItems(pageslinks) {
110-
gi.getItems(pageslinks, function(items){
111-
itemCategory = itemCategory.replace(/ /g,'');
112-
fsPath.writeFile('./data/' + itemCategory + '.json', JSON.stringify(items), function(err){
77+
function getItems(category, links) {
78+
gi.getItems(category, links, function(items){
79+
category = category.replace(/ /g,'');
80+
fsPath.writeFile('./data/' + category + '.json', JSON.stringify(items), function(err){
11381
if (err) console.log(err);
114-
console.log('\x1b[32m%s\x1b[0m' ,'\n SUCCESS : ' +pageslinks.length+ ' item(s) were crawled.');
115-
console.log('\x1b[33m%s\x1b[0m' ,'File ' + itemCategory +'.json' + ' was generated under "data/" folder.');
82+
console.log('\x1b[32m%s\x1b[0m' ,'\n SUCCESS : ' +items.length+ ' item(s) were crawled.');
83+
console.log('\x1b[33m%s\x1b[0m' ,'File ' + category +'.json' + ' was generated under "data/" folder.');
11684
process.exit();
11785
});
11886
});
11987
}
12088

121-
function concatToOneArray(arrToConvert) {
122-
var newArr = [];
123-
for(var i = 0; i < arrToConvert.length; i++) {
124-
newArr = newArr.concat(arrToConvert[i]);
125-
}
126-
const noDuplicateItemArray = newArr.filter((val,id,array) => array.indexOf(val) == id);
127-
return noDuplicateItemArray;
89+
function getLinksFromFile() {
90+
var links = JSON.parse(fs.readFileSync('./data/links/' + itemCategory + '_links.json', 'utf8'));
91+
return links;
92+
}
93+
94+
function resumeLastParse() {
95+
var resume = JSON.parse(fs.readFileSync('./data/links/resume.json', 'utf8'));
96+
getItems(resume.category, resume.links);
12897
}
12998

13099
function asciiArt() {

lib/cli-view/cmd.js

Lines changed: 33 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
var inquirer = require('inquirer');
2+
var fs = require('fs');
23
var language = 'french';
34
var game = 'dofus';
45
var firstCategory = [
@@ -43,19 +44,8 @@ var equipment = [
4344
},
4445
{
4546
type: 'input',
46-
name: 'fromPage',
47-
message: 'From which page do you want to start ?',
48-
default: '1',
49-
validate: function(value) {
50-
var valid = !isNaN(parseFloat(value));
51-
return valid || 'Please enter a number';
52-
},
53-
filter: Number
54-
},
55-
{
56-
type: 'input',
57-
name: 'pages',
58-
message: 'To which page do you want to stop ?',
47+
name: 'maxItem',
48+
message: 'How many items do you want to parse ?',
5949
default: '1',
6050
validate: function(value) {
6151
var valid = !isNaN(parseFloat(value));
@@ -77,19 +67,8 @@ var weapon = [
7767
},
7868
{
7969
type: 'input',
80-
name: 'fromPage',
81-
message: 'From which page do you want to start ?',
82-
default: '1',
83-
validate: function(value) {
84-
var valid = !isNaN(parseFloat(value));
85-
return valid || 'Please enter a number';
86-
},
87-
filter: Number
88-
},
89-
{
90-
type: 'input',
91-
name: 'pages',
92-
message: 'To which page do you want to stop ?',
70+
name: 'maxItem',
71+
message: 'How many items do you want to parse ?',
9372
default: '1',
9473
validate: function(value) {
9574
var valid = !isNaN(parseFloat(value));
@@ -102,34 +81,44 @@ var weapon = [
10281
var page = [
10382
{
10483
type: 'input',
105-
name: 'fromPage',
106-
message: 'From which page do you want to start ?',
84+
name: 'maxItem',
85+
message: 'How many items do you want to parse ?',
10786
default: '1',
10887
validate: function(value) {
10988
var valid = !isNaN(parseFloat(value));
11089
return valid || 'Please enter a number';
11190
},
11291
filter: Number
113-
},
92+
}
93+
];
94+
95+
var resume = [
11496
{
115-
type: 'input',
116-
name: 'pages',
117-
message: 'To which page do you want to stop ?',
118-
default: '1',
119-
validate: function(value) {
120-
var valid = !isNaN(parseFloat(value));
121-
return valid || 'Please enter a number';
122-
},
123-
filter: Number
97+
type: 'list',
98+
name: 'resume',
99+
message: 'Do you want to resume your last parse ?',
100+
choices: ['Yes', 'No'],
101+
filter: function(val) {
102+
return val.toLowerCase();
103+
}
124104
}
125105
];
126106

127-
var getCategory = exports.getCategory = function() {
128-
return inquirer.prompt(firstCategory).then(answers => {
129-
language = answers.language;
130-
game = answers.game;
131-
return switchCategory(answers.category);
132-
});
107+
var getCategory = exports.getCategory = function(ifResume) {
108+
if(ifResume) {
109+
return inquirer.prompt(resume).then(answers => {
110+
if(answers.resume == 'yes') return answers.resume;
111+
else fs.unlinkSync('./data/links/resume.json')
112+
return getCategory(false);
113+
});
114+
}
115+
else{
116+
return inquirer.prompt(firstCategory).then(answers => {
117+
language = answers.language;
118+
game = answers.game;
119+
return switchCategory(answers.category);
120+
});
121+
}
133122
}
134123

135124
function switchCategory(category) {

0 commit comments

Comments
 (0)