@@ -218,6 +218,7 @@ const PAGE_SIZE = 50;
218218const MAX_CONCURRENT_DOWNLOADS = 2 ;
219219const MAX_DOWNLOAD_RETRIES = 3 ;
220220const DOWNLOAD_RETRY_WAIT_SECONDS = 10000 ;
221+ const REQUEST_TIMEOUT_MS = 120000 ; // Abort the HTTP request itself if it never responds
221222const MAX_FAILURES_BEFORE_BLACKLIST = 5 ;
222223const STREAM_TIMEOUT_MS = 300000 ; // 5 minute timeout for stream operations (prevents hangs)
223224const BLACKLIST_EXPIRY_MS = 2 * 24 * 60 * 60 * 1000 ; // 2 days in milliseconds
@@ -677,25 +678,37 @@ async function downloadFile(ctx: ScraperContext, downloadQueueEntry: DownloadQue
677678 proxySelection = proxyResult . selection ;
678679 proxyOptions = proxyResult . options ;
679680
680- const response = await axios . get ( downloadUrl , {
681- responseType : 'stream' ,
682- timeout : 120000 , // Increased timeout to 2 minutes for large files
683- ...proxyOptions ,
684- onDownloadProgress : progressEvent => {
685- if ( progressEvent . total ) {
686- // can calculate percentage
687- safeUpdateTask ( ctx , taskId , {
688- percentage : progressEvent . loaded / progressEvent . total ,
689- message : chalk . blue ( `Downloading` ) ,
690- } ) ;
691- } else {
692- safeUpdateTask ( ctx , taskId , {
693- percentage : progressEvent . loaded ? 100 : 0 ,
694- message : chalk . blue ( `Downloading` ) ,
695- } ) ;
696- }
697- } ,
698- } ) ;
681+ // Separate controller to abort requests that never start streaming
682+ const requestController = new AbortController ( ) ;
683+ const requestTimeout = setTimeout ( ( ) => {
684+ requestController . abort ( ) ;
685+ } , REQUEST_TIMEOUT_MS ) ;
686+
687+ let response ;
688+ try {
689+ response = await axios . get ( downloadUrl , {
690+ responseType : 'stream' ,
691+ timeout : REQUEST_TIMEOUT_MS , // request-level timeout
692+ signal : requestController . signal ,
693+ ...proxyOptions ,
694+ onDownloadProgress : progressEvent => {
695+ if ( progressEvent . total ) {
696+ // can calculate percentage
697+ safeUpdateTask ( ctx , taskId , {
698+ percentage : progressEvent . loaded / progressEvent . total ,
699+ message : chalk . blue ( `Downloading` ) ,
700+ } ) ;
701+ } else {
702+ safeUpdateTask ( ctx , taskId , {
703+ percentage : progressEvent . loaded ? 100 : 0 ,
704+ message : chalk . blue ( `Downloading` ) ,
705+ } ) ;
706+ }
707+ } ,
708+ } ) ;
709+ } finally {
710+ clearTimeout ( requestTimeout ) ;
711+ }
699712 recordProxyOutcome ( ctx , proxySelection ) ;
700713
701714 // Check for 500 errors before processing the stream
@@ -913,12 +926,13 @@ async function downloadFile(ctx: ScraperContext, downloadQueueEntry: DownloadQue
913926 if ( isAxiosError ( error ) ) {
914927 const status = error . response ?. status ;
915928 const errorCode = ( error as any ) . code ;
929+ const isAbort = errorCode === 'ERR_CANCELED' || ( error as any ) . name === 'CanceledError' || ( error as any ) . name === 'AbortError' ;
916930
917931 // Handle connection errors (ECONNRESET, ECONNABORTED, ETIMEDOUT)
918- if ( errorCode === 'ECONNRESET' || errorCode === 'ECONNABORTED' || errorCode === 'ETIMEDOUT' ) {
932+ if ( errorCode === 'ECONNRESET' || errorCode === 'ECONNABORTED' || errorCode === 'ETIMEDOUT' || isAbort ) {
919933 await recordFailure ( ctx , filePath , fileName ) ;
920934 safeUpdateTask ( ctx , taskId , {
921- message : `Connection error (${ errorCode } ). ${ retries < MAX_DOWNLOAD_RETRIES ? 'Retrying...' : 'Failed' } ` ,
935+ message : `Connection error (${ errorCode || 'aborted' } ). ${ retries < MAX_DOWNLOAD_RETRIES ? 'Retrying...' : 'Failed' } ` ,
922936 barTransformFn : chalk . red ,
923937 } ) ;
924938
@@ -1050,8 +1064,15 @@ async function downloadFiles(ctx: ScraperContext, downloadQueue: DownloadQueueEn
10501064}
10511065
10521066const MAX_RETRY_PASSES = 3 ;
1067+ let globalTaskCounter = 0 ;
1068+
1069+ interface DownloadAllResult {
1070+ completed : number ;
1071+ failed : number ;
1072+ total : number ;
1073+ }
10531074
1054- async function downloadAllWithRetries ( ctx : ScraperContext , downloadQueue : DownloadQueueEntry [ ] ) : Promise < void > {
1075+ async function downloadAllWithRetries ( ctx : ScraperContext , downloadQueue : DownloadQueueEntry [ ] ) : Promise < DownloadAllResult > {
10551076 const totalFiles = downloadQueue . length ;
10561077 let currentQueue = downloadQueue ;
10571078 let completedTotal = 0 ;
@@ -1064,6 +1085,12 @@ async function downloadAllWithRetries(ctx: ScraperContext, downloadQueue: Downlo
10641085 console . log ( chalk . yellow ( `\nRetry pass ${ passNumber - 1 } : Retrying ${ currentQueue . length } failed download(s)...` ) ) ;
10651086 // Wait a bit before retry pass to let the server recover
10661087 await new Promise ( resolve => setTimeout ( resolve , 5000 ) ) ;
1088+
1089+ // Assign new unique task IDs for retry pass to avoid conflicts with removed tasks
1090+ currentQueue = currentQueue . map ( entry => ( {
1091+ ...entry ,
1092+ taskId : `dl-retry-${ passNumber } -${ ++ globalTaskCounter } ` ,
1093+ } ) ) ;
10671094 }
10681095
10691096 const result = await downloadFiles ( ctx , currentQueue , totalFiles , completedTotal ) ;
@@ -1078,13 +1105,16 @@ async function downloadAllWithRetries(ctx: ScraperContext, downloadQueue: Downlo
10781105 }
10791106
10801107 // After all retries, blacklist any remaining failed items
1081- if ( currentQueue . length > 0 ) {
1082- console . log ( chalk . red ( `\n${ currentQueue . length } download(s) failed after ${ MAX_RETRY_PASSES } passes, adding to blacklist:` ) ) ;
1108+ const finalFailed = currentQueue . length ;
1109+ if ( finalFailed > 0 ) {
1110+ console . log ( chalk . red ( `\n${ finalFailed } download(s) failed after ${ MAX_RETRY_PASSES } passes, adding to blacklist:` ) ) ;
10831111 for ( const task of currentQueue ) {
10841112 console . log ( chalk . red ( ` - ${ task . fileName } ` ) ) ;
10851113 await addToBlacklist ( ctx , task . filePath , task . fileName ) ;
10861114 }
10871115 }
1116+
1117+ return { completed : completedTotal , failed : finalFailed , total : totalFiles } ;
10881118}
10891119
10901120function sanitizeFileName ( fileName : string ) : string {
@@ -1269,11 +1299,20 @@ async function scrapeCreator(ctx: ScraperContext): Promise<void> {
12691299 barTransformFn : chalk . yellow ,
12701300 message : 'Starting downloads...' ,
12711301 } ) ;
1272- await downloadAllWithRetries ( ctx , downloadQueue ) ;
1273- ctx . downloadBars . done ( ctx . overallProgressBarId , {
1274- message : 'All files downloaded.' ,
1275- barTransformFn : chalk . green ,
1276- } ) ;
1302+ const downloadResult = await downloadAllWithRetries ( ctx , downloadQueue ) ;
1303+
1304+ // Show appropriate completion message based on results
1305+ if ( downloadResult . failed === 0 ) {
1306+ ctx . downloadBars . done ( ctx . overallProgressBarId , {
1307+ message : 'All files downloaded.' ,
1308+ barTransformFn : chalk . green ,
1309+ } ) ;
1310+ } else {
1311+ ctx . downloadBars . done ( ctx . overallProgressBarId , {
1312+ message : `${ downloadResult . completed } /${ downloadResult . total } files downloaded. ${ downloadResult . failed } failed (blacklisted).` ,
1313+ barTransformFn : chalk . yellow ,
1314+ } ) ;
1315+ }
12771316
12781317 // Save blacklist one final time before exiting
12791318 await saveBlacklist ( ctx ) ;
0 commit comments