@@ -5,10 +5,19 @@ import getItems from './getItems';
55import { gameUrlListSwitcher } from './services/utils' ;
66import { Spinner } from 'clui' ;
77import errorHandler from '../../services/errorHandler' ;
8- import { mongoImport , requestOpts } from './services/utils' ;
8+ import { mongoImport , requestOpts , parseEachPageLinksClass , maxItemControl } from './services/utils' ;
99
10- let url = null , globalUrl , itemCategory , game , maxItem = 1 , all = true ;
11- const NB_ITEM_PER_PAGE = 96 , links = [ ] ;
10+ const NB_ITEM_PER_PAGE = 96 ;
11+ const initData = {
12+ url : null ,
13+ globalUrl : null ,
14+ itemCategory : null ,
15+ game : null ,
16+ maxItem : null ,
17+ all : true ,
18+ links : [ ] ,
19+ $ : null
20+ } ;
1221
1322/**
1423 * Function which initialize the parsing process (global var, urls, category to parse...)
@@ -18,31 +27,33 @@ function parserInit(cmdResponse) {
1827 const countdown = new Spinner ( 'Crawler in progress... It could take some time ' , [ '⣾' , '⣽' , '⣻' , '⢿' , '⡿' , '⣟' , '⣯' , '⣷' ] ) ;
1928 countdown . start ( ) ;
2029
21- game = cmdResponse . game ;
22- maxItem = cmdResponse . maxItem ;
23- all = cmdResponse . all ;
24- itemCategory = cmdResponse . category ;
25- url = gameUrlListSwitcher ( cmdResponse ) ;
26- globalUrl = url . substring ( 0 , url . indexOf ( '.com/' ) + 4 ) ;
27- requestOpts . url = url ;
30+ initData . game = cmdResponse . game ;
31+ initData . maxItem = cmdResponse . maxItem ;
32+ initData . all = cmdResponse . all ;
33+ initData . itemCategory = cmdResponse . category ;
34+ initData . url = gameUrlListSwitcher ( cmdResponse ) ;
35+ initData . globalUrl = initData . url . substring ( 0 , initData . url . indexOf ( '.com/' ) + 4 ) ;
36+ requestOpts . url = initData . url ;
2837}
2938
3039/**
3140 * Function responsible for fetching all item's links and put them in a file
3241 */
3342async function getPagesLinks ( ) {
34- const $ = await request ( requestOpts ) . catch ( err => new errorHandler ( err ) ) ;
35- // In order to prevent from empty value of realMaxItem when there is only 1 page
36- const realMaxItem = Number ( $ ( 'div.ak-list-info > strong' ) . text ( ) ) === 0 ? $ ( 'div.ak-container.ak-panel.main-object-list.ak-nocontentpadding tbody > tr' ) . length : Number ( $ ( 'div.ak-list-info > strong' ) . text ( ) ) ;
37- if ( maxItem >= realMaxItem || all === true ) maxItem = realMaxItem ; // security line to avoid overpasing the real amont of items & to take care about 'all' value if true
38-
39- const nbPageToParse = Math . ceil ( maxItem / NB_ITEM_PER_PAGE ) ;
40- for ( let page = 1 ; page <= nbPageToParse ; page ++ ) {
41- requestOpts . url = url + '&page=' + page ;
42- const $ = await request ( requestOpts ) . catch ( err => new errorHandler ( err ) ) ;
43- parseEachPageLinks ( $ ) ;
43+ initData . $ = await request ( requestOpts ) . catch ( err => new errorHandler ( err ) ) ;
44+ initData . maxItem = maxItemControl ( initData ) ; // In order to prevent from empty value of realMaxItem when there is only 1 page & to avoird overpassing real maxitem value
45+ if ( initData . itemCategory == 'classe' ) {
46+ initData . links = initData . links . concat ( parseEachPageLinksClass ( initData ) ) ;
47+ }
48+ else {
49+ const nbPageToParse = Math . ceil ( initData . maxItem / NB_ITEM_PER_PAGE ) ;
50+ for ( let page = 1 ; page <= nbPageToParse ; page ++ ) {
51+ requestOpts . url = initData . url + '&page=' + page ;
52+ const $ = await request ( requestOpts ) . catch ( err => new errorHandler ( err ) ) ;
53+ parseEachPageLinks ( $ ) ;
54+ }
4455 }
45- writeFileSync ( './data/links/' + itemCategory + '_links.json' , JSON . stringify ( links ) ) ;
56+ writeFileSync ( './data/links/' + initData . itemCategory + '_links.json' , JSON . stringify ( initData . links ) ) ;
4657 console . log ( '\x1b[36m%s\x1b[0m' , '\n SUCCESS : all item(s) links crawled.' ) ;
4758 console . log ( '\x1b[36m%s\x1b[0m' , '\n START of item(s) crawling.' ) ;
4859}
@@ -52,20 +63,11 @@ async function getPagesLinks() {
5263 * @param {String } $ html page to analize
5364 */
5465function parseEachPageLinks ( $ ) {
55- // warning, classe is not a classic encyclopedia
56- if ( itemCategory == 'classe' ) {
57- $ ( '.ak-content-sections' ) . find ( '.ak-section' ) . each ( function ( i , div ) {
58- if ( ! all ) if ( links . length >= maxItem ) return false ;
59- const link = globalUrl + $ ( this ) . find ( 'a' ) . attr ( 'href' ) ;
60- links . push ( link ) ;
61- } ) ;
62- } else {
63- $ ( 'tbody' ) . find ( 'tr' ) . each ( function ( i , tr ) {
64- if ( ! all ) if ( links . length >= maxItem ) return false ;
65- const link = globalUrl + $ ( this ) . find ( 'td' ) . eq ( 1 ) . find ( 'a' ) . attr ( 'href' ) ;
66- links . push ( link ) ;
67- } ) ;
68- }
66+ $ ( 'tbody' ) . find ( 'tr' ) . each ( function ( i , tr ) {
67+ if ( ! initData . all ) if ( initData . links . length >= initData . maxItem ) return false ;
68+ const link = initData . globalUrl + $ ( this ) . find ( 'td' ) . eq ( 1 ) . find ( 'a' ) . attr ( 'href' ) ;
69+ initData . links . push ( link ) ;
70+ } ) ;
6971}
7072
7173/**
0 commit comments