Skip to content

Commit af90fa2

Browse files
author
Zakaria RACHEDI
committed
New parsing algo added to all items + mounts bug correction
1 parent d0bdf16 commit af90fa2

File tree

8 files changed

+177
-180
lines changed

8 files changed

+177
-180
lines changed

README.md

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,9 +48,8 @@ An item result example
4848
"stats":[ "many statistics line", "stat 2", ["..."], "stat n" ],
4949
"condition":[ "many conditions line", "condition 2", ["..."], "condition n" ],
5050
"set":{
51-
"id":"set ID in order to link it with items (relation : one to many)",
52-
"url":"set url",
53-
"name":"set name"
51+
"equipments":[itemId, itemId ...],
52+
"weapons":[itemId, itemId ...],
5453
}
5554
}
5655
```

lib/getItems.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ function getData(url, back, callback) {
6363
case /\b(sets|panoplies)\b/gi.test(url):
6464
categoryPromise = gs.getSets(url);
6565
break;
66-
case /\b(mounts|monture)\b/gi.test(url):
66+
case /\b(mounts|montures)\b/gi.test(url):
6767
categoryPromise = gm.getMounts(url);
6868
break;
6969
case /\b(pets|familiers)\b/gi.test(url):

lib/item-crawler/getConsumables.js

Lines changed: 45 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -1,83 +1,71 @@
11
var request = require('request-promise');
22
var cheerio = require('cheerio');
3+
var { getId, getElement, getDate, sanatizer } = require('./helpers');
4+
var { effectParse, recipeParse, descriptionParse } = require('./parsing-service/helpers');
35

46
var requestOpts = {
57
url: '',
68
transform: function (body) {
79
return cheerio.load(body);
810
}
911
};
12+
var item = {};
13+
var body = '';
1014

1115
var getConsumables = exports.getConsumables = function (url) {
1216
requestOpts.url = url;
1317
return request(requestOpts).then(function ($) {
14-
var itemId = url.replace(/\D/g,'');
15-
var type = $('div.ak-encyclo-detail-right.ak-nocontentpadding').find('div.ak-encyclo-detail-type.col-xs-6').find('span').text().trim();
16-
var name = $('h1.ak-return-link').text().trim();
17-
var description = $('div.ak-encyclo-detail-right.ak-nocontentpadding').find('div.ak-container.ak-panel').first().find('div.ak-panel-content').text().trim();
18-
var lvl = $('div.ak-encyclo-detail-right.ak-nocontentpadding').find('div.ak-encyclo-detail-level.col-xs-6.text-right').text().trim().replace(/\D/g,'');
19-
var imgUrl = $('div.ak-encyclo-detail-illu').find('img').attr('src').replace('dofus/ng/img/../../../', '');
18+
/////// Global initializations ///////
19+
body = $.html();
2020

21-
var item = {
22-
item_identifiant: itemId,
23-
name: name,
24-
description: description,
25-
lvl: lvl,
26-
type: type,
27-
imgUrl: imgUrl,
28-
url: url
29-
}
21+
/////// Description parse ///////
22+
item = descriptionParse(body, url);
3023

31-
item["effect"] = [];
24+
/////// Tabs initializations ///////
25+
item["stats"] = [];
3226
item["condition"] = [];
3327
item["recipe"] = [];
3428

35-
if ($('div.ak-encyclo-detail-right.ak-nocontentpadding').find('div.col-sm-6').eq(0).find('div.ak-container.ak-panel').eq(0).text() !== '') {
36-
$('div.ak-encyclo-detail-right.ak-nocontentpadding').find('div.col-sm-6').eq(0).find('div.ak-container.ak-panel').eq(0).find('div.ak-list-element').each(function(i, element){
37-
var stat = $(this).find( "div.ak-title" ).text().trim();
38-
var element = stat.replace(/de|à|[()]|\+|\-|to|and|(-?\d[\d\.]*)/gi, '').trim();
39-
element = element.charAt(0).toUpperCase() + element.slice(1);
40-
var numbers = [];
41-
stat.replace(/(-?\d[\d\.]*)/g, function( x ) {
42-
var n = Number(x); if (x == n) { numbers.push(x); }
43-
});
44-
var groupeElement = {[element]: numbers[0]};
45-
item["effect"].push(groupeElement);
46-
47-
});
48-
}
49-
50-
if ($('div.ak-encyclo-detail-right.ak-nocontentpadding').find('div.col-sm-6').eq(1).find('div.ak-container.ak-panel.no-padding').eq(0).text() !== '') {
51-
$('div.ak-encyclo-detail-right.ak-nocontentpadding').find('div.col-sm-6').eq(1).find('div.ak-container.ak-panel.no-padding').eq(0).find('div.ak-list-element').each(function(i, element){
52-
var condition = $(this).find( "div.ak-title" ).remove("br").text().trim();
53-
var conditionTab = condition.split('et\n');
54-
item["condition"] = conditionTab;
29+
/////// Effects & condtions parse ///////
30+
var $akContainer = $('div.ak-encyclo-detail-right.ak-nocontentpadding').find('div.ak-container.ak-panel');
31+
if(typeof $akContainer.eq(1) !== 'undefined') {
32+
$akContainer.eq(1).find('div.col-sm-6').each(function(i, element){
33+
var infoCategory = $(this).find('div.ak-panel-title').text().trim().toLowerCase();
34+
categorySwitch(infoCategory, $(this).html());
5535
});
5636
}
5737

38+
/////// Recipes parse ///////
5839
if(typeof $('div.ak-container.ak-panel.ak-crafts') !== 'undefined') {
59-
$('div.ak-container.ak-panel.ak-crafts').find('div.ak-panel-content').find('div.ak-container.ak-content-list').find('div.ak-column').each(function(i, element){
60-
var setUrl = 'https://www.dofus-touch.com' + $(this).find('div.ak-title').find('a').attr('href');
61-
var setId = $(this).find('div.ak-title').find('a').attr('href').replace(/\D/g,'');
62-
var setImage = $(this).find('div.ak-image').find('a').find('span.ak-linker').find('img').attr('src').replace('dofus/ng/img/../../../', '');
63-
var setQuantity = $(this).find('div.ak-front').text().replace(/\x/g,'').trim();
64-
var setName = $(this).find('div.ak-content').find('div.ak-title').find('a').find('span.ak-linker').text().trim();
65-
var setType = $(this).find('div.ak-content').find('div.ak-text').text().trim();
66-
var setLvl = $(this).find('div.ak-aside').text().replace(/\D/g,'').trim();
67-
68-
var groupeElement = {[setName]: {
69-
'id': setId,
70-
'name': setName,
71-
'url': setUrl,
72-
'imgUrl': setImage,
73-
'type': setType,
74-
'lvl': setLvl,
75-
'quantity': setQuantity
76-
}};
77-
item["recipe"].push(groupeElement);
78-
});
40+
item["recipe"] = recipeParse(body);;
7941
}
80-
8142
return item;
8243
});
44+
}
45+
46+
function conditionParse(body) {
47+
var $ = cheerio.load(body);
48+
var condition = $('div.ak-container.ak-panel.no-padding').find('div.ak-list-element').find( "div.ak-title" ).remove("br").text().trim();
49+
condition = sanatizer(condition);
50+
var conditionTab = condition.split('et');
51+
conditionTab = conditionTab.map(function(string) {
52+
return sanatizer(string).trim();
53+
});
54+
item["condition"] = conditionTab;
55+
}
56+
57+
function categorySwitch(infoCategory, body) {
58+
var $ = cheerio.load(body);
59+
switch (infoCategory) {
60+
case 'effets':
61+
item["stats"] = effectParse(body);
62+
break;
63+
case 'effects':
64+
item["stats"] = effectParse(body);
65+
break;
66+
case 'conditions':
67+
conditionParse(body);
68+
break;
69+
console.log('Sorry, we are out of ' + infoCategory + '.');
70+
}
8371
}

lib/item-crawler/getEquipments.js

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ var getEquipments = exports.getEquipments = function (url) {
3737
var $akContainer = $('div.ak-encyclo-detail-right.ak-nocontentpadding').find('div.ak-container.ak-panel');
3838
if(typeof $akContainer.eq(1) !== 'undefined') {
3939
$akContainer.eq(1).find('div.col-sm-6').each(function(i, element){
40-
var infoCategory = $(this).find('div.ak-panel-title').text().trim().toLowerCase();
40+
var infoCategory = $(this).find('div.ak-panel-title').text().trim().toLowerCase().normalize('NFD').replace(/[\u0300-\u036f]/g, "");
4141
categorySwitch(infoCategory, $(this).html());
4242
});
4343
}
@@ -52,7 +52,7 @@ var getEquipments = exports.getEquipments = function (url) {
5252

5353
function conditionParse(body) {
5454
var $ = cheerio.load(body);
55-
var condition = $('div.ak-container.ak-panel.no-padding').find('div.ak-list-element').find( "div.ak-title" ).remove("br").text().trim().sanatizer();
55+
var condition = $('div.ak-container.ak-panel.no-padding').find('div.ak-list-element').find( "div.ak-title" ).remove("br").text().trim();
5656
condition = sanatizer(condition);
5757
var conditionTab = condition.split('et');
5858
conditionTab = conditionTab.map(function(string) {

lib/item-crawler/getMounts.js

Lines changed: 52 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,53 +1,71 @@
11
var request = require('request-promise');
22
var cheerio = require('cheerio');
3-
var { getId, getElement } = require('./helpers');
3+
var { getId, getElement, getDate, sanatizer } = require('./helpers');
4+
var { effectParse, recipeParse, descriptionParse } = require('./parsing-service/helpers');
45

56
var requestOpts = {
67
url: '',
78
transform: function (body) {
89
return cheerio.load(body);
910
}
1011
};
12+
var item = {};
13+
var body = '';
1114

1215
var getMounts = exports.getMounts = function (url) {
1316
requestOpts.url = url;
1417
return request(requestOpts).then(function ($) {
15-
var itemId = getId(url);
16-
var type = $('div.ak-encyclo-detail').find('div.ak-encyclo-detail-type.col-xs-6').find('span').text().trim();
17-
var name = $('h1.ak-return-link').text().trim();
18-
var imgUrl = $('div.ak-encyclo-detail-illu').find('img').attr('src').replace('dofus/ng/img/../../../', '');
19-
20-
var item = {
21-
id: itemId,
22-
name: name,
23-
lvl: '60',
24-
type: type,
25-
imgUrl: imgUrl,
26-
url: url
27-
}
28-
18+
/////// Global initializations ///////
19+
body = $.html();
20+
21+
/////// Description parse ///////
22+
item = descriptionParse(body, url);
23+
item.lvl = 60;
24+
25+
/////// Tabs initializations ///////
2926
item["stats"] = [];
3027
item["characteristic"] = [];
31-
$('div.ak-encyclo-detail').find('div.ak-rides-details').find('div.row').eq(1).find('div.col-md-6').eq(0).find('div.ak-list-element').each(function(i, element){
32-
var stat = $(this).find( "div.ak-title" ).text().trim();
33-
var element = getElement(stat);
34-
element = element.charAt(0).toUpperCase() + element.slice(1);
35-
var numbers = [];
36-
stat.replace(/(-?\d[\d\.]*)/g, function( x ) {
37-
var n = Number(x); if (x == n) { numbers.push(x); }
28+
29+
/////// Effects & condtions parse ///////
30+
var $akContainer = $('div.ak-rides-details').eq(0).find('div.row');
31+
if(typeof $akContainer.eq(1) !== 'undefined') {
32+
$akContainer.eq(1).find('div.col-md-6').each(function(i, element){
33+
var infoCategory = $(this).find('div.ak-panel-title').text().trim().toLowerCase().normalize('NFD').replace(/[\u0300-\u036f]/g, "");
34+
categorySwitch(infoCategory, $(this).html());
3835
});
39-
var groupeElement = {[element]: {'from': numbers[0], 'to': numbers[1]}};
40-
item["stats"].push(groupeElement);
41-
});
42-
$('div.ak-encyclo-detail').find('div.ak-rides-details').find('div.row').eq(1).find('div.col-md-6').eq(1).find('div.ak-list-element').each(function(i, element){
43-
var spanTxt = $(this).find( "div.ak-title" ).find("span").text();
44-
$(this).find( "div.ak-title" ).find("span").remove();
45-
var characteristic = $(this).find( "div.ak-title" ).text().trim() + ' ' + spanTxt;
46-
var element = characteristic.substring(0, characteristic.indexOf(":")).trim();
47-
var subElement = characteristic.substring(characteristic.indexOf(":")+1, characteristic.length).trim();
48-
var groupeElement = {[element]: subElement};
49-
item["characteristic"].push(groupeElement);
50-
});
36+
}
5137
return item;
5238
});
39+
}
40+
41+
function characteristicParse(body) {
42+
var $ = cheerio.load(body);
43+
$('div.ak-container.ak-panel').eq(0).find('div.ak-list-element').each(function(i, element){
44+
var spanTxt = $(this).find( "div.ak-title" ).find("span").text();
45+
$(this).find( "div.ak-title" ).find("span").remove();
46+
var characteristic = $(this).find( "div.ak-title" ).text().trim() + ' ' + spanTxt;
47+
var element = characteristic.substring(0, characteristic.indexOf(":")).trim();
48+
var subElement = characteristic.substring(characteristic.indexOf(":")+1, characteristic.length).trim();
49+
var groupeElement = {[element]: subElement};
50+
item["characteristic"].push(groupeElement);
51+
});
52+
}
53+
54+
function categorySwitch(infoCategory, body) {
55+
var $ = cheerio.load(body);
56+
switch (infoCategory) {
57+
case 'effets':
58+
item["stats"] = effectParse(body);
59+
break;
60+
case 'effects':
61+
item["stats"] = effectParse(body);
62+
break;
63+
case 'characteristics':
64+
characteristicParse(body);
65+
break;
66+
case 'caracteristiques':
67+
characteristicParse(body);
68+
break;
69+
console.log('Sorry, we are out of ' + infoCategory + '.');
70+
}
5371
}

0 commit comments

Comments
 (0)