Skip to content

Commit 31ab794

Browse files
author
Zakaria RACHEDI
committed
New parsing way updated + condition with "OR" fixed
1 parent 2309b6b commit 31ab794

File tree

4 files changed

+183
-165
lines changed

4 files changed

+183
-165
lines changed

lib/item-crawler/getEquipments.js

Lines changed: 28 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
var request = require('request-promise');
22
var cheerio = require('cheerio');
33
var { getId, getElement, getDate, sanatizer } = require('./helpers');
4+
var { effectParse, recipeParse, descriptionParse } = require('./parsing-service/helpers');
45

56
var requestOpts = {
67
url: '',
@@ -9,14 +10,16 @@ var requestOpts = {
910
}
1011
};
1112
var item = {};
13+
var body = '';
1214

1315
var getEquipments = exports.getEquipments = function (url) {
1416
requestOpts.url = url;
1517
return request(requestOpts).then(function ($) {
18+
/////// Global initializations ///////
19+
body = $.html();
1620

1721
/////// Description parse ///////
18-
var body = $.html();
19-
descriptionParse(body, url);
22+
item = descriptionParse(body, url);
2023

2124
/////// Tabs initializations ///////
2225
item["stats"] = [];
@@ -31,110 +34,45 @@ var getEquipments = exports.getEquipments = function (url) {
3134
}
3235

3336
/////// Effects & condtions parse ///////
34-
if(typeof $('div.ak-encyclo-detail-right.ak-nocontentpadding').find('div.ak-container.ak-panel').eq(1) !== 'undefined') {
35-
$('div.ak-encyclo-detail-right.ak-nocontentpadding').find('div.ak-container.ak-panel').eq(1).find('div.col-sm-6').each(function(i, element){
37+
var $akContainer = $('div.ak-encyclo-detail-right.ak-nocontentpadding').find('div.ak-container.ak-panel');
38+
if(typeof $akContainer.eq(1) !== 'undefined') {
39+
$akContainer.eq(1).find('div.col-sm-6').each(function(i, element){
3640
var infoCategory = $(this).find('div.ak-panel-title').text().trim().toLowerCase();
37-
console.log(infoCategory);
38-
switch (infoCategory) {
39-
case 'effets':
40-
effectParse($(this).html());
41-
break;
42-
case 'effects':
43-
effectParse($(this).html());
44-
break;
45-
case 'conditions':
46-
conditionParse($(this).html());
47-
break;
48-
console.log('Sorry, we are out of ' + infoCategory + '.');
49-
}
41+
categorySwitch(infoCategory, $(this).html());
5042
});
5143
}
5244

5345
/////// Recipes parse ///////
5446
if(typeof $('div.ak-container.ak-panel.ak-crafts') !== 'undefined') {
55-
var body = $.html();
56-
recipeParse(body);
47+
item["recipe"] = recipeParse(body);
5748
}
5849
return item;
5950
});
6051
}
6152

62-
function effectParse(body) {
63-
var $ = cheerio.load(body);
64-
$('div.ak-list-element').each(function(i, element){
65-
var stat = $(this).find( "div.ak-title" ).text().trim();
66-
var statToTest = stat.toLowerCase().normalize('NFD').replace(/[\u0300-\u036f]/g, "");
67-
if (statToTest.includes('title') || statToTest.includes('titre') || statToTest.includes('attitude') || statToTest.includes('emote') ||
68-
statToTest.includes('echangeable') || statToTest.includes('exchangeable') || statToTest.includes('lie au') || statToTest.includes('linked to')) {
69-
if (statToTest.includes('title') || statToTest.includes('titre')) item["stats"].push({'title': stat.split(':')[1].trim()});
70-
else if (statToTest.includes('attitude') || statToTest.includes('emote')) item["stats"].push({'emote': stat});
71-
else if (statToTest.includes('echangeable') || statToTest.includes('exchangeable')) item["stats"].push({'exchangeable': getDate(stat)});
72-
else if (statToTest.includes('lie au') || statToTest.includes('linked to')) item["stats"].push({'linked': true});
73-
}else {
74-
var stat = $(this).find( "div.ak-title" ).text().trim();
75-
var element = getElement(stat);
76-
element = element.charAt(0).toUpperCase() + element.slice(1);
77-
var numbers = [];
78-
stat.replace(/(-?\d[\d\.]*)/g, function( x ) {
79-
var n = Number(x); if (x == n) { numbers.push(x); }
80-
});
81-
if(typeof numbers[1] == 'undefined') var groupeElement = {[element]: numbers[0]};
82-
else var groupeElement = {[element]: {'from': numbers[0], 'to': numbers[1]}};
83-
item["stats"].push(groupeElement);
84-
}
85-
});
86-
}
87-
8853
function conditionParse(body) {
8954
var $ = cheerio.load(body);
90-
var condition = $('div.ak-container.ak-panel.no-padding').find('div.ak-list-element').find( "div.ak-title" ).remove("br").text().trim();
91-
var conditionTab = condition.split('et\n');
92-
/*console.log(conditionTab);
93-
console.log(conditionTab.map(function(string) {
94-
return sanatizer(string);
95-
}));*/
96-
item["condition"] = conditionTab;
97-
}
98-
99-
function recipeParse(body) {
100-
var $ = cheerio.load(body);
101-
$('div.ak-container.ak-panel.ak-crafts').find('div.ak-panel-content').find('div.ak-container.ak-content-list').find('div.ak-column').each(function(i, element){
102-
var setUrl = 'https://www.dofus-touch.com' + $(this).find('div.ak-title').find('a').attr('href');
103-
var setId = $(this).find('div.ak-title').find('a').attr('href').replace(/\D/g,'');
104-
var setImage = $(this).find('div.ak-image').find('a').find('span.ak-linker').find('img').attr('src').replace('dofus/ng/img/../../../', '');
105-
var setQuantity = $(this).find('div.ak-front').text().replace(/\x/g,'').trim();
106-
var setName = $(this).find('div.ak-content').find('div.ak-title').find('a').find('span.ak-linker').text().trim();
107-
var setType = $(this).find('div.ak-content').find('div.ak-text').text().trim();
108-
var setLvl = $(this).find('div.ak-aside').text().replace(/\D/g,'').trim();
109-
110-
var groupeElement = {[setName]: {
111-
'id': setId,
112-
'url': setUrl,
113-
'imgUrl': setImage,
114-
'type': setType,
115-
'lvl': setLvl,
116-
'quantity': setQuantity
117-
}};
118-
item["recipe"].push(groupeElement);
55+
var condition = $('div.ak-container.ak-panel.no-padding').find('div.ak-list-element').find( "div.ak-title" ).remove("br").text().trim().sanatizer();
56+
condition = sanatizer(condition);
57+
var conditionTab = condition.split('et');
58+
conditionTab = conditionTab.map(function(string) {
59+
return sanatizer(string).trim();
11960
});
61+
item["condition"] = conditionTab;
12062
}
12163

122-
function descriptionParse(body, url) {
64+
function categorySwitch(infoCategory, body) {
12365
var $ = cheerio.load(body);
124-
var itemId = getId(url);
125-
var type = $('div.ak-encyclo-detail-right.ak-nocontentpadding').find('div.ak-encyclo-detail-type.col-xs-6').find('span').text().trim();
126-
var name = $('h1.ak-return-link').text().trim();
127-
var description = $('div.ak-encyclo-detail-right.ak-nocontentpadding').find('div.ak-container.ak-panel').first().find('div.ak-panel-content').text().trim();
128-
var lvl = $('div.ak-encyclo-detail-right.ak-nocontentpadding').find('div.ak-encyclo-detail-level.col-xs-6.text-right').text().trim().replace(/\D/g,'');
129-
var imgUrl = $('div.ak-encyclo-detail-illu').find('img').attr('src').replace('dofus/ng/img/../../../', '');
130-
131-
item = {
132-
id: itemId,
133-
name: name,
134-
description: description,
135-
lvl: lvl,
136-
type: type,
137-
imgUrl: imgUrl,
138-
url: url
66+
switch (infoCategory) {
67+
case 'effets':
68+
item["stats"] = effectParse(body);
69+
break;
70+
case 'effects':
71+
item["stats"] = effectParse(body);
72+
break;
73+
case 'conditions':
74+
conditionParse(body);
75+
break;
76+
console.log('Sorry, we are out of ' + infoCategory + '.');
13977
}
14078
}

lib/item-crawler/getWeapons.js

Lines changed: 76 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -1,100 +1,102 @@
11
var request = require('request-promise');
22
var cheerio = require('cheerio');
3-
var { getId, getElement } = require('./helpers');
3+
var { getId, getElement, getDate, sanatizer } = require('./helpers');
4+
var { effectParse, recipeParse, descriptionParse } = require('./parsing-service/helpers');
45

56
var requestOpts = {
67
url: '',
78
transform: function (body) {
89
return cheerio.load(body);
910
}
1011
};
12+
var item = {};
13+
var body = '';
1114

1215
var getWeapons = exports.getWeapons = function (url) {
1316
requestOpts.url = url;
1417
return request(requestOpts).then(function ($) {
15-
var itemId = getId(url);
16-
var type = $('div.ak-encyclo-detail-right.ak-nocontentpadding').find('div.ak-encyclo-detail-type.col-xs-6').find('span').text().trim();
17-
var name = $('h1.ak-return-link').text().trim();
18-
var description = $('div.ak-encyclo-detail-right.ak-nocontentpadding').find('div.ak-container.ak-panel').first().find('div.ak-panel-content').text().trim();
19-
var lvl = $('div.ak-encyclo-detail-right.ak-nocontentpadding').find('div.ak-encyclo-detail-level.col-xs-6.text-right').text().trim().replace(/\D/g,'');
20-
var imgUrl = $('div.ak-encyclo-detail-illu').find('img').attr('src').replace('dofus/ng/img/../../../', '');
21-
22-
var item = {
23-
id: itemId,
24-
name: name,
25-
description: description,
26-
lvl: lvl,
27-
type: type,
28-
imgUrl: imgUrl,
29-
url: url
30-
}
31-
18+
/////// Global initializations ///////
19+
body = $.html();
20+
21+
/////// Description parse ///////
22+
item = descriptionParse(body, url);
23+
24+
/////// Tabs initializations ///////
3225
item["stats"] = [];
26+
item["characteristic"] = [];
3327
item["condition"] = [];
34-
item["recipe"] = [];
28+
item["recipe"] = [];
29+
item["set"] = [];
30+
31+
/////// Sets parse ///////
3532
if(typeof $('div.ak-container.ak-panel.ak-crafts').next('div.ak-container.ak-panel').find('div.ak-panel-title').find('a').attr('href') !== 'undefined') {
36-
var setUrl = 'https://www.dofus-touch.com' + $('div.ak-container.ak-panel.ak-crafts').next('div.ak-container.ak-panel').find('div.ak-panel-title').find('a').attr('href');
3733
var setId = $('div.ak-container.ak-panel.ak-crafts').next('div.ak-container.ak-panel').find('div.ak-panel-title').find('a').attr('href').replace(/\D/g,'');
38-
var setName = $('div.ak-container.ak-panel.ak-crafts').next('div.ak-container.ak-panel').find('div.ak-panel-title').find('a').text();
39-
item.set = {
40-
id: setId,
41-
url: setUrl,
42-
name: setName
43-
}
44-
}else {
45-
item["set"] = [];
34+
item.set = setId;
4635
}
47-
item["characteristic"] = [];
48-
$('div.ak-encyclo-detail-right.ak-nocontentpadding').find('div.col-sm-6').eq(1).find('div.ak-container.ak-panel.no-padding').eq(0).find('div.ak-list-element').each(function(i, element){
49-
var spanTxt = $(this).find( "div.ak-title" ).find("span").text();
50-
$(this).find( "div.ak-title" ).find("span").remove();
51-
var characteristic = $(this).find( "div.ak-title" ).text().trim() + ' ' + spanTxt;
52-
var element = characteristic.substring(0, characteristic.indexOf(":")).trim();
53-
var subElement = characteristic.substring(characteristic.indexOf(":")+1, characteristic.length).trim();
54-
var groupeElement = {[element]: subElement};
55-
item["characteristic"].push(groupeElement);
56-
});
5736

58-
$('div.ak-encyclo-detail-right.ak-nocontentpadding').find('div.col-sm-6').eq(0).find('div.ak-list-element').each(function(i, element){
59-
var stat = $(this).find( "div.ak-title" ).text().trim();
60-
var element = getElement(stat);
61-
element = element.charAt(0).toUpperCase() + element.slice(1);
62-
var numbers = [];
63-
stat.replace(/(-?\d[\d\.]*)/g, function( x ) {
64-
var n = Number(x); if (x == n) { numbers.push(x); }
37+
/////// Effects & Condtions & Characteristics parse ///////
38+
var $akContainer = $('div.ak-encyclo-detail-right.ak-nocontentpadding').find('div.ak-container.ak-panel');
39+
if(typeof $akContainer.eq(1) !== 'undefined') {
40+
$akContainer.eq(1).find('div.col-sm-6').each(function(i, element){
41+
if($(this).find('div.ak-container.ak-panel').eq(1).html() !== null) {
42+
categorySwitch($(this).find('div.ak-container.ak-panel').eq(1).find('div.ak-panel-title').text().trim().toLowerCase(), $(this).find('div.ak-container.ak-panel').eq(1).html());
43+
}
44+
var infoCategory = $(this).find('div.ak-container.ak-panel').eq(0).find('div.ak-panel-title').text().trim().toLowerCase().normalize('NFD').replace(/[\u0300-\u036f]/g, "");
45+
categorySwitch(infoCategory, $(this).html());
6546
});
66-
var groupeElement = {[element]: {'from': numbers[0], 'to': numbers[1]}};
67-
item["stats"].push(groupeElement);
68-
});
69-
70-
$('div.ak-encyclo-detail-right.ak-nocontentpadding').find('div.col-sm-6').eq(1).find('div.ak-container.ak-panel.no-padding').eq(1).find('div.ak-list-element').each(function(i, element){
71-
var condition = $(this).find( "div.ak-title" ).remove("br").text().trim();
72-
var conditionTab = condition.split('et\n');
73-
item["condition"] = conditionTab;
74-
});
47+
}
7548

49+
/////// Recipes parse ///////
7650
if(typeof $('div.ak-container.ak-panel.ak-crafts') !== 'undefined') {
77-
$('div.ak-container.ak-panel.ak-crafts').find('div.ak-panel-content').find('div.ak-container.ak-content-list').find('div.ak-column').each(function(i, element){
78-
var setUrl = 'https://www.dofus-touch.com' + $(this).find('div.ak-title').find('a').attr('href');
79-
var setId = $(this).find('div.ak-title').find('a').attr('href').replace(/\D/g,'');
80-
var setImage = $(this).find('div.ak-image').find('a').find('span.ak-linker').find('img').attr('src').replace('dofus/ng/img/../../../', '');
81-
var setQuantity = $(this).find('div.ak-front').text().replace(/\x/g,'').trim();
82-
var setName = $(this).find('div.ak-content').find('div.ak-title').find('a').find('span.ak-linker').text().trim();
83-
var setType = $(this).find('div.ak-content').find('div.ak-text').text().trim();
84-
var setLvl = $(this).find('div.ak-aside').text().replace(/\D/g,'').trim();
85-
86-
var groupeElement = {[setName]: {
87-
'id': setId,
88-
'name': setName,
89-
'url': setUrl,
90-
'imgUrl': setImage,
91-
'type': setType,
92-
'lvl': setLvl,
93-
'quantity': setQuantity
94-
}};
95-
item["recipe"].push(groupeElement);
96-
});
51+
item["recipe"] = recipeParse(body);;
9752
}
53+
9854
return item;
9955
});
56+
}
57+
58+
function conditionParse(body) {
59+
var $ = cheerio.load(body);
60+
var condition = $('div.ak-panel-content').find('div.ak-list-element').find( "div.ak-title" ).remove("br").text().trim();
61+
condition = sanatizer(condition);
62+
var conditionTab = condition.split('et');
63+
conditionTab = conditionTab.map(function(string) {
64+
return sanatizer(string).trim();
65+
});
66+
item["condition"] = conditionTab;
67+
}
68+
69+
function characteristicParse(body) {
70+
var $ = cheerio.load(body);
71+
$('div.ak-container.ak-panel.no-padding').eq(0).find('div.ak-list-element').each(function(i, element){
72+
var spanTxt = $(this).find( "div.ak-title" ).find("span").text();
73+
$(this).find( "div.ak-title" ).find("span").remove();
74+
var characteristic = $(this).find( "div.ak-title" ).text().trim() + ' ' + spanTxt;
75+
var element = characteristic.substring(0, characteristic.indexOf(":")).trim();
76+
var subElement = characteristic.substring(characteristic.indexOf(":")+1, characteristic.length).trim();
77+
var groupeElement = {[element]: subElement};
78+
item["characteristic"].push(groupeElement);
79+
});
80+
}
81+
82+
function categorySwitch(infoCategory, body) {
83+
var $ = cheerio.load(body);
84+
switch (infoCategory) {
85+
case 'effets':
86+
item["stats"] = effectParse(body);
87+
break;
88+
case 'effects':
89+
item["stats"] = effectParse(body);
90+
break;
91+
case 'characteristics':
92+
characteristicParse(body);
93+
break;
94+
case 'caracteristiques':
95+
characteristicParse(body);
96+
break;
97+
case 'conditions':
98+
conditionParse(body);
99+
break;
100+
console.log('Sorry, we are out of ' + infoCategory + '.');
101+
}
100102
}

lib/item-crawler/helpers.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ module.exports = {
1414
},
1515

1616
sanatizer: (string) => {
17-
return string.replace(/\\n/g, '').trim();
17+
return string.replace(/(\\n)|(\(|\))/g, '').replace(/(\r\n|\n|\r)/gm," ").replace(/\s\s+/g, ' ').trim();
1818
}
1919

2020
};

0 commit comments

Comments
 (0)