Skip to content

Commit bc7ff14

Browse files
author
Zakaria RACHEDI
committed
New parsing method issues fixed
1 parent 371668c commit bc7ff14

File tree

2 files changed

+12
-17
lines changed

2 files changed

+12
-17
lines changed

lib/app.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ function getPageLinks() {
7676

7777
function getItems(category, links) {
7878
gi.getItems(category, links, function(items){
79+
if(fs.existsSync('./data/links/resume.json')) fs.unlinkSync('./data/links/resume.json');
7980
category = category.replace(/ /g,'');
8081
fsPath.writeFile('./data/' + category + '.json', JSON.stringify(items), function(err){
8182
if (err) console.log(err);

lib/getItems.js

Lines changed: 11 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@ var CLI = require('clui'),
1515
var currentPosition = 0;
1616
var progressbarPosition = 0;
1717
var itemsList = [];
18-
var fullList;
19-
const parserLimit = 700;
18+
var gCategory;
19+
var gLinks;
2020
var thisProgressBar = new Progress(20);
2121
var options = {
2222
method: 'POST',
@@ -27,15 +27,15 @@ var options = {
2727
};
2828

2929
var getItems = exports.getItems = function(category, links, back) {
30+
gCategory = category;
31+
gLinks = links;
3032
if (currentPosition == 0) {
3133
if(fs.existsSync('./data/links/resume.json')) {
3234
var itemsResumed = JSON.parse(fs.readFileSync('./data/'+category+'.json', 'utf8'));
3335
itemsList = itemsResumed;
3436
}
35-
// console.log('initiale itemsList: '+itemsList.length);
36-
// console.log('initiale links: '+links.length);
3737
}
38-
fullList = itemsList.length + links.length;
38+
// if(currentPosition == 10) limitCrossed(gCategory, currentPosition, gLinks, itemsList);
3939
getData(links[currentPosition],back, function(item){
4040
itemsList.push(item);
4141
if (progressbarPosition >= links.length * 0.05) {
@@ -44,14 +44,8 @@ var getItems = exports.getItems = function(category, links, back) {
4444
}
4545
progressbarPosition++;
4646
currentPosition++;
47-
if(currentPosition == parserLimit) {
48-
// console.log('itemList: '+itemsList.length+' links: '+fullList);
49-
if(itemsList.length !== fullList) limitCrossed(category, currentPosition, links, itemsList);
50-
}else if(currentPosition < links.length) getItems(category, links, back); // any more items in array?
51-
else {
52-
console.log(thisProgressBar.update(100, 100));
53-
back(itemsList);
54-
}
47+
if(currentPosition < links.length) getItems(category, links, back); // any more items in array?
48+
else console.log(thisProgressBar.update(100, 100)), back(itemsList);
5549
});
5650
};
5751

@@ -111,8 +105,8 @@ function getData(url, back, callback) {
111105
console.log('\x1b[33m%s\x1b[0m' ,'\n Error 404 detected ! Maybe empty item (Encyclopedia error).');
112106
callback();
113107
}else if(err.statusCode == '429') {
114-
console.log('\x1b[31m%s\x1b[0m' ,'\n /!\\ Error 429 detected ! Too many request, be careful Ankama can ban your IP. /!\\ Never parse more than 20 pages/hour');
115-
process.exit();
108+
console.log('\x1b[31m%s\x1b[0m' ,'\n /!\\ Error 429 detected ! Too many request, be careful Ankama can ban your IP. /!\\ Never parse more than 700 items/hour');
109+
limitCrossed(gCategory, currentPosition, gLinks, itemsList);
116110
}else {
117111
console.log(err);
118112
console.log('\x1b[31m%s\x1b[0m' ,'/!\\Broken promise all');
@@ -123,15 +117,15 @@ function getData(url, back, callback) {
123117

124118
function limitCrossed(category, lastIndex, links, items) {
125119
if(fs.existsSync('./data/links/resume.json')) fs.unlinkSync('./data/links/resume.json');
126-
links = links.slice(lastIndex, 10);
120+
links = links.slice(lastIndex);
127121
var resume = {'category': category, links: links};
128122

129123
category = category.replace(/ /g,'');
130124
fsPath.writeFile('./data/' + category + '.json', JSON.stringify(items), function(err){
131125
if (err) console.log(err);
132126
fsPath.writeFile('./data/links/resume.json', JSON.stringify(resume), function(err){
133127
if (err) console.log(err);
134-
console.log('\x1b[31m%s\x1b[0m' ,'\n/!\\You reached maximum request per hour ('+parserLimit+'), over pass it will provoke a ban IP from Ankama. Resume the parsing after 1h !');
128+
console.log('\x1b[31m%s\x1b[0m' ,'\n/!\\You reached maximum request per hour, over pass it will provoke a ban IP from Ankama. Resume the parsing after 1h !');
135129
console.log('\x1b[33m%s\x1b[0m' ,'\n INFO : ' +items.length+ ' item(s) were crawled.');
136130
console.log('\x1b[33m%s\x1b[0m' ,'/!\\Don\'t worry, the app will resume the parsing from last item parsed ;)');
137131
setTimeout(process.exit(),4000);

0 commit comments

Comments
 (0)