@@ -12,8 +12,7 @@ var CLI = require('clui'),
1212var url ;
1313var globalUrl ;
1414var itemCategory ;
15- var currentPage = 1 ;
16- var pageslinks = [ ] ;
15+ var maxItem = 1 ;
1716var requestOpts = {
1817 url : '' ,
1918 method : 'GET' ,
@@ -26,105 +25,75 @@ main();
2625
2726function main ( ) {
2827 asciiArt ( ) ;
29- cm . getCategory ( ) . then (
30- function ( cmdResponse ) {
31- crawlerInit ( cmdResponse ) ;
32- } ) . catch (
33- function ( err ) {
34- console . log ( '\x1b[31m%s\x1b[0m' , '/!\\Broken promise from cmd' ) ;
35- console . log ( err ) ;
36- process . exit ( ) ;
37- } ) ;
28+ var ifResume = fs . existsSync ( './data/links/resume.json' ) ? true : false ;
29+ cm . getCategory ( ifResume ) . then (
30+ function ( cmdResponse ) {
31+ if ( cmdResponse == "yes" ) resumeLastParse ( ) ;
32+ else crawlerInit ( cmdResponse ) ;
33+ } ) . catch (
34+ function ( err ) {
35+ console . log ( '\x1b[31m%s\x1b[0m' , '/!\\Broken promise from cmd resume' ) ;
36+ console . log ( err ) ;
37+ process . exit ( ) ;
38+ } ) ;
3839}
3940
4041function crawlerInit ( cmdResponse ) {
4142 cmdResponse = JSON . parse ( cmdResponse ) ;
4243 var countdown = new Spinner ( 'Crawler in progress... It could take some time ' , [ '⣾' , '⣽' , '⣻' , '⢿' , '⡿' , '⣟' , '⣯' , '⣷' ] ) ;
4344 countdown . start ( ) ;
44- maxPage = cmdResponse . pages ;
45- currentPage = cmdResponse . fromPage ;
45+ maxItem = cmdResponse . maxItem ;
4646 itemCategory = cmdResponse . category ;
4747 cmdResponse . language == 'french' ? url = sw . cmdSwitch ( itemCategory ) : url = sw . cmdSwitchEn ( itemCategory ) ;
4848 cmdResponse . game == 'dofus' ? url = url : url = url . replace ( 'https://www.dofus.com' , 'https://www.dofus-touch.com' ) ;
4949 globalUrl = url . substring ( 0 , url . indexOf ( ".com/" ) + 4 ) ;
5050 requestOpts . url = url ;
51- var realMaxPagePromise = request ( requestOpts ) . then ( function ( $ ) {
52- var realMaxPage = $ ( 'div.text-center.ak-pagination.hidden-xs' ) . find ( 'ul.ak-pagination.pagination.ak-ajaxloader li:last-child' ) . prev ( ) . prev ( ) . text ( ) . trim ( ) ;
53- return realMaxPage ;
54- } ) ;
55- getAllLinks ( realMaxPagePromise ) ;
56- }
57-
58- function getAllLinks ( realMaxPagePromise ) {
59- realMaxPagePromise . then (
60- function ( realMaxPage ) {
61- if ( realMaxPage == '' ) {
62- realMaxPage = 1 ;
63- }
64- if ( realMaxPage >= maxPage && currentPage <= maxPage ) {
65- var callback = function ( values ) {
66- pageslinks . push ( values ) ;
67- currentPage ++ ;
68- if ( currentPage <= maxPage ) {
69- getPageLinks ( currentPage , callback ) ;
70- } else {
71- pageslinks = concatToOneArray ( pageslinks ) ;
72- console . log ( '\x1b[36m%s\x1b[0m' , '\n SUCCESS : all item(s) links crawled.' ) ;
73- console . log ( '\x1b[36m%s\x1b[0m' , '\n START of item(s) crawling.' ) ;
74- getItems ( pageslinks ) ;
75- }
76- }
77- getPageLinks ( currentPage , callback ) ;
78- } else {
79- console . log ( '\x1b[31m%s\x1b[0m' , '\n /!\\ Max page of this category is ' + realMaxPage + ' so ' + maxPage + ' is to much :(' ) ;
80- process . exit ( ) ;
81- }
82-
83- } ) . catch (
84- function ( err ) {
85- console . log ( err ) ;
86- console . log ( '\x1b[31m%s\x1b[0m' , '/!\\Broken promise from getAllLinks' ) ;
87- process . exit ( ) ;
88- } ) ;
51+ getPageLinks ( ) ;
8952}
9053
91- function getPageLinks ( currentPage , callback ) {
92- requestOpts . url = url + 'page=' + currentPage ;
54+ function getPageLinks ( ) {
9355 return request ( requestOpts ) . then ( function ( $ ) {
9456 var links = [ ] ;
9557 $ ( 'tbody' ) . find ( 'tr' ) . each ( function ( i , tr ) {
58+ if ( i >= maxItem ) return false ;
9659 var link = globalUrl + $ ( this ) . find ( 'td' ) . eq ( 1 ) . find ( 'a' ) . attr ( 'href' ) ;
9760 links . push ( link ) ;
9861 } ) ;
9962 return links ;
100- } ) . then ( function ( val ) {
101- callback ( val ) ;
63+ } ) . then ( function ( links ) {
64+ fsPath . writeFile ( './data/links/' + itemCategory + '_links.json' , JSON . stringify ( links ) , function ( err ) {
65+ if ( err ) console . log ( err ) ;
66+ console . log ( '\x1b[36m%s\x1b[0m' , '\n SUCCESS : all item(s) links crawled.' ) ;
67+ console . log ( '\x1b[36m%s\x1b[0m' , '\n START of item(s) crawling.' ) ;
68+ getItems ( itemCategory , getLinksFromFile ( ) ) ;
69+ } ) ;
10270 } ) . catch ( function ( err ) {
10371 console . log ( '\x1b[31m%s\x1b[0m' , '/!\\Broken promise from getPageLinks' ) ;
10472 console . log ( err ) ;
10573 process . exit ( ) ;
10674 } ) ;
10775}
10876
109- function getItems ( pageslinks ) {
110- gi . getItems ( pageslinks , function ( items ) {
111- itemCategory = itemCategory . replace ( / / g, '' ) ;
112- fsPath . writeFile ( './data/' + itemCategory + '.json' , JSON . stringify ( items ) , function ( err ) {
77+ function getItems ( category , links ) {
78+ gi . getItems ( category , links , function ( items ) {
79+ category = category . replace ( / / g, '' ) ;
80+ fsPath . writeFile ( './data/' + category + '.json' , JSON . stringify ( items ) , function ( err ) {
11381 if ( err ) console . log ( err ) ;
114- console . log ( '\x1b[32m%s\x1b[0m' , '\n SUCCESS : ' + pageslinks . length + ' item(s) were crawled.' ) ;
115- console . log ( '\x1b[33m%s\x1b[0m' , 'File ' + itemCategory + '.json' + ' was generated under "data/" folder.' ) ;
82+ console . log ( '\x1b[32m%s\x1b[0m' , '\n SUCCESS : ' + items . length + ' item(s) were crawled.' ) ;
83+ console . log ( '\x1b[33m%s\x1b[0m' , 'File ' + category + '.json' + ' was generated under "data/" folder.' ) ;
11684 process . exit ( ) ;
11785 } ) ;
11886 } ) ;
11987}
12088
121- function concatToOneArray ( arrToConvert ) {
122- var newArr = [ ] ;
123- for ( var i = 0 ; i < arrToConvert . length ; i ++ ) {
124- newArr = newArr . concat ( arrToConvert [ i ] ) ;
125- }
126- const noDuplicateItemArray = newArr . filter ( ( val , id , array ) => array . indexOf ( val ) == id ) ;
127- return noDuplicateItemArray ;
89+ function getLinksFromFile ( ) {
90+ var links = JSON . parse ( fs . readFileSync ( './data/links/' + itemCategory + '_links.json' , 'utf8' ) ) ;
91+ return links ;
92+ }
93+
94+ function resumeLastParse ( ) {
95+ var resume = JSON . parse ( fs . readFileSync ( './data/links/resume.json' , 'utf8' ) ) ;
96+ getItems ( resume . category , resume . links ) ;
12897}
12998
13099function asciiArt ( ) {
0 commit comments