11#! /usr/bin/env node
22
3- const flow = require ( "xml-flow" )
4- const https = require ( "https" )
5- const path = require ( "path" )
6- const tar = require ( "tar" )
7- const fs = require ( "fs" )
8-
9- const api = process . argv . slice ( 2 ) . shift ( ) ||
10- "https://data.public.lu/api/1/datasets/letzebuerger-online-dictionnaire/"
3+ const flow = require ( 'xml-flow' )
4+ const https = require ( 'https' )
5+ const path = require ( 'path' )
6+ const tar = require ( 'tar' )
7+ const fs = require ( 'fs' )
118
9+ const api =
10+ process . argv . slice ( 2 ) . shift ( ) ||
11+ 'https://data.public.lu/api/1/datasets/letzebuerger-online-dictionnaire/'
1212
1313const regex = / \. x m l $ /
1414const infos = { noAudio : [ ] , smallFiles : [ ] , writeFail : [ ] , countAudio : 0 , countJson : 0 }
@@ -21,56 +21,56 @@ const getURLfromAPI = () => {
2121 https
2222 . get ( api , ( resp ) => {
2323 if ( resp . statusCode !== 200 )
24- return reject ( new Error ( resp . statusCode + " : " + resp . statusMessage ) )
25- let body = ""
26- resp . on ( " data" , ( data ) => ( body += data ) )
27- resp . on ( " end" , ( ) => {
24+ return reject ( new Error ( resp . statusCode + ' : ' + resp . statusMessage ) )
25+ let body = ''
26+ resp . on ( ' data' , ( data ) => ( body += data ) )
27+ resp . on ( ' end' , ( ) => {
2828 try {
2929 body = JSON . parse ( body )
3030 } catch ( err ) {
3131 reject ( err )
3232 }
3333 let [ resources ] = body . resources || [ ]
34- if ( resources && " url" in resources ) resolve ( resources . url )
35- else reject ( new Error ( " URL ressource not found" ) )
34+ if ( resources && ' url' in resources ) resolve ( resources . url )
35+ else reject ( new Error ( ' URL ressource not found' ) )
3636 } )
3737 } )
38- . on ( " error" , ( err ) => reject ( err ) )
38+ . on ( ' error' , ( err ) => reject ( err ) )
3939 } )
4040}
4141
42- const createFolders = ( distFolder = " dist" ) => {
42+ const createFolders = ( distFolder = ' dist' ) => {
4343 distFolder = path . join ( process . cwd ( ) , distFolder )
44- audioFolder = path . join ( distFolder , " audio" )
45- jsonFolder = path . join ( distFolder , " json" )
44+ audioFolder = path . join ( distFolder , ' audio' )
45+ jsonFolder = path . join ( distFolder , ' json' )
4646 const folders = [ distFolder , audioFolder , jsonFolder ]
4747 for ( const folder of folders )
4848 if ( ! fs . existsSync ( folder ) ) fs . mkdirSync ( folder , { recursive : true } )
4949}
5050
5151const extract = ( url ) =>
5252 https . get ( url , ( resp ) => {
53- console . info ( `Extracting from : ${ url } ` , "\n" )
54- resp . pipe ( tar . t ( ) ) . on ( " entry" , ( entry ) => {
53+ console . info ( `Extracting from : ${ url } ` , '\n' )
54+ resp . pipe ( tar . t ( ) ) . on ( ' entry' , ( entry ) => {
5555 if ( regex . test ( entry . path ) ) parse ( entry )
5656 } )
5757 } )
5858
5959const parse = ( entry ) => {
60- console . info ( `Parsing from : ${ entry . path } ` , "\n" )
60+ console . info ( `Parsing from : ${ entry . path } ` , '\n' )
6161 return flow ( entry )
62- . on ( " tag:lod:item" , ( item ) => {
63- const id = item [ " lod:meta" ] [ " lod:id" ]
62+ . on ( ' tag:lod:item' , ( item ) => {
63+ const id = item [ ' lod:meta' ] [ ' lod:id' ]
6464 printProgress ( id )
65- if ( " lod:audio" in item && " $text" in item [ " lod:audio" ] ) {
66- const audio = item [ " lod:audio" ] [ " $text" ]
65+ if ( ' lod:audio' in item && ' $text' in item [ ' lod:audio' ] ) {
66+ const audio = item [ ' lod:audio' ] [ ' $text' ]
6767 writeAudio ( id , audio )
6868 } else infos . noAudio . push ( id )
69- delete item [ " lod:audio" ]
69+ delete item [ ' lod:audio' ]
7070 writeJson ( id , item )
7171 } )
72- . on ( " error" , ( err ) => console . error ( err ) )
73- . on ( " end" , feedBack )
72+ . on ( ' error' , ( err ) => console . error ( err ) )
73+ . on ( ' end' , feedBack )
7474}
7575
7676const writeJson = ( id , item ) => {
@@ -87,7 +87,7 @@ const writeJson = (id, item) => {
8787const writeAudio = ( id , data ) => {
8888 const filename = `${ id } .mp3`
8989 const audioPath = path . join ( audioFolder , filename )
90- const buff = new Buffer . from ( data , " base64" )
90+ const buff = new Buffer . from ( data , ' base64' )
9191 if ( buff . length < 1000 ) infos . smallFiles . push ( id )
9292 try {
9393 fs . writeFileSync ( audioPath , buff )
@@ -108,19 +108,19 @@ const feedBack = () => {
108108 const time = new Date ( hrend [ 0 ] * 1000 ) . toISOString ( ) . substr ( 11 , 8 )
109109 process . stdout . cursorTo ( 0 )
110110 process . stdout . clearLine ( )
111- console . info ( " ⦿ Execution time : %s" , time )
112- console . info ( " √ Json files : %s" , infos . countJson )
113- console . info ( " √ Mp3 files : %s" , infos . countAudio )
114- console . info ( " ☓ Items without audio : " , infos . noAudio . length , infos . noAudio )
115- console . info ( " ⁈ Files very small : " , infos . smallFiles . length , infos . smallFiles , "\n" )
111+ console . info ( ' ⦿ Execution time : %s' , time )
112+ console . info ( ' √ Json files : %s' , infos . countJson )
113+ console . info ( ' √ Mp3 files : %s' , infos . countAudio )
114+ console . info ( ' ☓ Items without audio : ' , infos . noAudio . length , infos . noAudio )
115+ console . info ( ' ⁈ Files very small : ' , infos . smallFiles . length , infos . smallFiles , '\n' )
116116 process . exit ( )
117117}
118118
119119const main = ( ) => {
120- process . on ( " SIGINT" , feedBack )
120+ process . on ( ' SIGINT' , feedBack )
121121 getURLfromAPI ( )
122122 . then ( ( url ) => {
123- createFolders ( path . basename ( url ) . replace ( " .tar.gz" , "" ) )
123+ createFolders ( path . basename ( url ) . replace ( ' .tar.gz' , '' ) )
124124 extract ( url )
125125 } )
126126 . catch ( console . error )
0 commit comments