@@ -91,6 +91,7 @@ interface Config {
9191 host ?: HostType ;
9292 outputDir ?: string ;
9393 maxPosts ?: number ;
94+ maxConcurrentDownloads ?: number ;
9495 proxies ?: ProxyConfig [ ] ;
9596 proxyRotation ?: ProxyRotationMode ;
9697 // List of creators to scrape
@@ -113,6 +114,15 @@ async function loadConfig(configPath: string): Promise<Config> {
113114 // Normalize and validate proxy configuration
114115 config . proxies = Array . isArray ( config . proxies ) ? config . proxies : [ ] ;
115116 config . proxyRotation = config . proxyRotation || 'round_robin' ;
117+
118+ // Validate maxConcurrentDownloads if provided
119+ if ( config . maxConcurrentDownloads !== undefined ) {
120+ if ( typeof config . maxConcurrentDownloads !== 'number' ||
121+ config . maxConcurrentDownloads < 1 ||
122+ config . maxConcurrentDownloads > 10 ) {
123+ throw new Error ( 'maxConcurrentDownloads must be a number between 1 and 10' ) ;
124+ }
125+ }
116126
117127 for ( const proxy of config . proxies ) {
118128 if ( ! proxy || typeof proxy !== 'object' ) {
@@ -200,11 +210,21 @@ const argv = yargs(hideBin(process.argv))
200210 description : 'Maximum number of posts to fetch (0 = unlimited, default: 5000)' ,
201211 default : 5000 ,
202212 } )
213+ . option ( 'maxConcurrentDownloads' , {
214+ alias : 'd' ,
215+ type : 'number' ,
216+ description : 'Maximum concurrent downloads (1-10, default: 2)' ,
217+ default : 2 ,
218+ } )
203219 . check ( ( argv ) => {
204220 // Either config file or service+userId must be provided
205221 if ( ! argv . config && ( ! argv . service || ! argv . userId ) ) {
206222 throw new Error ( 'Either --config or both --service and --userId must be provided' ) ;
207223 }
224+ // Validate maxConcurrentDownloads range
225+ if ( argv . maxConcurrentDownloads < 1 || argv . maxConcurrentDownloads > 10 ) {
226+ throw new Error ( 'maxConcurrentDownloads must be between 1 and 10' ) ;
227+ }
208228 return true ;
209229 } )
210230 . help ( )
@@ -215,7 +235,6 @@ const argv = yargs(hideBin(process.argv))
215235
216236// Constants
217237const PAGE_SIZE = 50 ;
218- const MAX_CONCURRENT_DOWNLOADS = 2 ;
219238const MAX_DOWNLOAD_RETRIES = 3 ;
220239const DOWNLOAD_RETRY_WAIT_SECONDS = 10000 ;
221240const REQUEST_TIMEOUT_MS = 120000 ; // Abort the HTTP request itself if it never responds
@@ -247,6 +266,7 @@ interface ScraperContext {
247266 host : HostType ;
248267 outputDir : string ;
249268 maxPosts : number ;
269+ maxConcurrentDownloads : number ;
250270 baseDomain : string ;
251271 subdomains : string [ ] ;
252272 blacklistFile : string ;
@@ -280,6 +300,7 @@ function createScraperContext(
280300 host : HostType ,
281301 outputDir : string ,
282302 maxPosts : number ,
303+ maxConcurrentDownloads : number ,
283304 downloadBars : MultiProgressBars ,
284305 proxyManager : ProxyManager | null
285306) : ScraperContext {
@@ -292,6 +313,7 @@ function createScraperContext(
292313 host,
293314 outputDir : resolvedOutputDir ,
294315 maxPosts,
316+ maxConcurrentDownloads,
295317 baseDomain,
296318 subdomains,
297319 blacklistFile : path . join ( resolvedOutputDir , 'blacklist.json' ) ,
@@ -1025,7 +1047,7 @@ interface DownloadResult {
10251047}
10261048
10271049async function downloadFiles ( ctx : ScraperContext , downloadQueue : DownloadQueueEntry [ ] , totalFiles : number , completedSoFar : number ) : Promise < DownloadResult > {
1028- const queue = new AsyncQueue ( { limit : MAX_CONCURRENT_DOWNLOADS } ) ;
1050+ const queue = new AsyncQueue ( { limit : ctx . maxConcurrentDownloads } ) ;
10291051 const failedDownloads : DownloadQueueEntry [ ] = [ ] ;
10301052
10311053 return new Promise < DownloadResult > ( ( resolve , reject ) => {
@@ -1376,12 +1398,13 @@ async function scrapeCreator(ctx: ScraperContext): Promise<void> {
13761398 const host = creator . host || config . host || ( argv . host as HostType ) ;
13771399 const outputDir = creator . outputDir || config . outputDir || argv . outputDir ;
13781400 const maxPosts = creator . maxPosts ?? config . maxPosts ?? argv . maxPosts ;
1401+ const maxConcurrentDownloads = config . maxConcurrentDownloads ?? argv . maxConcurrentDownloads ?? 2 ;
13791402
13801403 console . log ( chalk . magenta ( `\n${ '=' . repeat ( 60 ) } ` ) ) ;
13811404 console . log ( chalk . magenta ( `[${ creatorNum } /${ config . creators . length } ] Scraping ${ service } /${ userId } ` ) ) ;
13821405 console . log ( chalk . magenta ( `${ '=' . repeat ( 60 ) } \n` ) ) ;
13831406
1384- const ctx = createScraperContext ( service , userId , host , outputDir , maxPosts , downloadBars , proxyManager ) ;
1407+ const ctx = createScraperContext ( service , userId , host , outputDir , maxPosts , maxConcurrentDownloads , downloadBars , proxyManager ) ;
13851408
13861409 try {
13871410 await scrapeCreator ( ctx ) ;
@@ -1407,9 +1430,10 @@ async function scrapeCreator(ctx: ScraperContext): Promise<void> {
14071430 const host = argv . host as HostType ;
14081431 const outputDir = argv . outputDir ;
14091432 const maxPosts = argv . maxPosts ;
1433+ const maxConcurrentDownloads = argv . maxConcurrentDownloads ?? 2 ;
14101434 const proxyManager = null ;
14111435
1412- const ctx = createScraperContext ( service , userId , host , outputDir , maxPosts , downloadBars , proxyManager ) ;
1436+ const ctx = createScraperContext ( service , userId , host , outputDir , maxPosts , maxConcurrentDownloads , downloadBars , proxyManager ) ;
14131437 await scrapeCreator ( ctx ) ;
14141438 }
14151439
0 commit comments