11import { resolve , join , posix , dirname , basename } from 'node:path'
2- import { writeFileSync , statSync , existsSync , mkdirSync } from 'node:fs'
2+ import { statSync , existsSync , mkdirSync } from 'node:fs'
3+ import { writeFile } from 'node:fs/promises'
34import { connect } from '@existdb/node-exist'
5+ import Bottleneck from 'bottleneck'
46
57/**
68 * @typedef { import("@existdb/node-exist").NodeExist } NodeExist
@@ -14,6 +16,8 @@ import { connect } from '@existdb/node-exist'
1416 * @prop {Number } depth how many levels to traverse down for recursive and tree views
1517 * @prop {String[] } include filter items
1618 * @prop {String[] } exclude filter items
19+ * @prop {Number } threads How many resources should be downloaded at the same time
20+ * @prop {Number } mintime How long a downloads should take at least
1721 */
1822
1923/**
@@ -57,10 +61,7 @@ const xmlBooleanSetting = {
5761 return value
5862 }
5963}
60- const serializationOptionNames = [
61- 'insert-final-newline' ,
62- 'omit-xml-declaration'
63- ]
64+ const serializationOptionNames = [ 'insert-final-newline' , 'omit-xml-declaration' ]
6465
6566const serializationDefaults = {
6667 // "exist:indent": "no",
@@ -71,7 +72,7 @@ const serializationDefaults = {
7172
7273function getSerializationOptions ( options ) {
7374 const serializationOptions = serializationDefaults
74- serializationOptionNames . forEach ( o => {
75+ serializationOptionNames . forEach ( ( o ) => {
7576 if ( o in options ) {
7677 serializationOptions [ o ] = options [ o ]
7778 }
@@ -83,6 +84,7 @@ function getSerializationOptions (options) {
8384/**
8485 * Download a single resource into an existdb instance
8586 * @param {NodeExist.BoundModules } db NodeExist client
87+ * @param {GetOptions } options
8688 * @param {Boolean } verbose
8789 * @param {ResourceInfo } resource
8890 * @param {String } directory
@@ -100,7 +102,7 @@ async function downloadResource (db, options, resource, directory, collection, r
100102 }
101103 const localName = rename || posix . basename ( resource . name )
102104 const localPath = join ( directory , localName )
103- await writeFileSync ( localPath , fileContents )
105+ await writeFile ( localPath , fileContents )
104106
105107 if ( verbose ) {
106108 console . log ( `✔︎ downloaded resource ${ localPath } ` )
@@ -115,11 +117,13 @@ async function downloadResource (db, options, resource, directory, collection, r
115117/**
116118 * download a collection from an existdb instance
117119 * @param {NodeExist } db NodeExist client
120+ * @param {GetOptions } options
118121 * @param {boolean } verbose
119122 * @param {String } collection
120123 * @param {String } baseCollection
124+ * @param {Bottleneck } limiter
121125 */
122- async function downloadCollection ( db , options , collection , baseCollection , directory ) {
126+ async function downloadCollection ( db , options , collection , baseCollection , directory , limiter ) {
123127 const absCollection = posix . join ( baseCollection , collection )
124128 const { verbose } = options
125129 try {
@@ -134,12 +138,21 @@ async function downloadCollection (db, options, collection, baseCollection, dire
134138 }
135139
136140 const targetDir = posix . join ( directory , collection )
137- await collectionMeta . documents . forEach (
138- async resource => downloadResource ( db , options , resource , targetDir , absCollection ) )
141+ // Download all documents. Do this in parallel, but not everything at once. Pool that work so we don't take down the
142+ // server
143+ await Promise . all (
144+ collectionMeta . documents . map ( async ( resource ) => {
145+ await limiter . schedule ( ( ) => downloadResource ( db , options , resource , targetDir , absCollection ) )
146+ } )
147+ )
139148
140149 // recursive (optional?)
141- await collectionMeta . collections . forEach (
142- async collection => downloadCollection ( db , options , collection , absCollection , targetDir ) )
150+
151+ // There should always be fewer collections than resources, so no need for pooling. Go over them one by one. No need
152+ // to do this in parallel
153+ for ( const collection of collectionMeta . collections ) {
154+ await downloadCollection ( db , options , collection , absCollection , targetDir , limiter )
155+ }
143156
144157 return true
145158 } catch ( e ) {
@@ -189,12 +202,13 @@ async function getPathInfo (db, path) {
189202 */
190203async function downloadCollectionOrResource ( db , source , target , options ) {
191204 // read parameters
192- // const start = Date.now()
205+ // const start = Date.now()
193206 const root = resolve ( target )
194207
195208 if ( options . verbose ) {
196209 console . log ( 'Downloading:' , source , 'to' , root )
197- console . log ( 'Server:' ,
210+ console . log (
211+ 'Server:' ,
198212 ( db . client . isSecure ? 'https' : 'http' ) + '://' + db . client . options . host + ':' + db . client . options . port ,
199213 '(v' + options . version + ')'
200214 )
@@ -205,6 +219,7 @@ async function downloadCollectionOrResource (db, source, target, options) {
205219 if ( options . exclude . length ) {
206220 console . log ( 'Exclude:\n' , ...options . exclude , '\n' )
207221 }
222+ console . log ( `Downloading up to ${ options . threads } resources at a time` )
208223 }
209224
210225 // initial file
@@ -260,10 +275,13 @@ async function downloadCollectionOrResource (db, source, target, options) {
260275 throw Error ( `${ source } is a collection but ${ root } is not a directory` )
261276 }
262277
278+ const limiter = new Bottleneck ( {
279+ maxConcurrent : options . threads ,
280+ minTime : options . mintime
281+ } )
282+
263283 // download collection into a folder
264- return await downloadCollection ( db , options ,
265- posix . basename ( info . name ) ,
266- posix . dirname ( info . name ) , root )
284+ return await downloadCollection ( db , options , posix . basename ( info . name ) , posix . dirname ( info . name ) , root , limiter )
267285}
268286
269287export const command = [ 'get [options] <source> <target>' , 'download' , 'fetch' ]
@@ -301,14 +319,34 @@ export function builder (yargs) {
301319 type : 'boolean' ,
302320 default : false
303321 } )
322+ . option ( 't' , {
323+ alias : 'threads' ,
324+ describe : 'The maximum number of concurrent threads that will be used to dowload data' ,
325+ type : 'number' ,
326+ default : 4
327+ } )
328+ . option ( 'm' , {
329+ alias : 'mintime' ,
330+ describe : 'The minimum time each dowload will take' ,
331+ type : 'number' ,
332+ default : 0
333+ } )
304334 . nargs ( { i : 1 , e : 1 } )
305335}
306336
307337export async function handler ( argv ) {
308338 if ( argv . help ) {
309339 return 0
310340 }
311- const { source } = argv
341+
342+ const { threads, mintime, source } = argv
343+
344+ if ( typeof mintime !== 'number' || mintime < 0 ) {
345+ throw Error ( 'Invalid value for option "mintime"; must be an integer equal or greater than zero.' )
346+ }
347+ if ( typeof threads !== 'number' || threads <= 0 ) {
348+ throw Error ( 'Invalid value for option "threads"; must be an integer equal or greater than zero.' )
349+ }
312350
313351 const target = argv . target ? argv . target : '.'
314352
0 commit comments