Skip to content

Commit e2f957b

Browse files
committed
feat: enhance download functionality with request timeout and improved error handling
- Introduced a request timeout to abort HTTP requests that do not respond within a specified duration. - Updated the download logic to handle aborted requests and provide clearer error messages. - Modified the downloadAllWithRetries function to return a summary of completed and failed downloads. - Improved progress tracking during file downloads with unique task IDs for retries.
1 parent 4ea0508 commit e2f957b

File tree

1 file changed

+68
-29
lines changed

1 file changed

+68
-29
lines changed

index.ts

Lines changed: 68 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,7 @@ const PAGE_SIZE = 50;
218218
const MAX_CONCURRENT_DOWNLOADS = 2;
219219
const MAX_DOWNLOAD_RETRIES = 3;
220220
const DOWNLOAD_RETRY_WAIT_SECONDS = 10000;
221+
const REQUEST_TIMEOUT_MS = 120000; // Abort the HTTP request itself if it never responds
221222
const MAX_FAILURES_BEFORE_BLACKLIST = 5;
222223
const STREAM_TIMEOUT_MS = 300000; // 5 minute timeout for stream operations (prevents hangs)
223224
const BLACKLIST_EXPIRY_MS = 2 * 24 * 60 * 60 * 1000; // 2 days in milliseconds
@@ -677,25 +678,37 @@ async function downloadFile(ctx: ScraperContext, downloadQueueEntry: DownloadQue
677678
proxySelection = proxyResult.selection;
678679
proxyOptions = proxyResult.options;
679680

680-
const response = await axios.get(downloadUrl, {
681-
responseType: 'stream',
682-
timeout: 120000, // Increased timeout to 2 minutes for large files
683-
...proxyOptions,
684-
onDownloadProgress: progressEvent => {
685-
if (progressEvent.total) {
686-
// can calculate percentage
687-
safeUpdateTask(ctx, taskId, {
688-
percentage: progressEvent.loaded / progressEvent.total,
689-
message: chalk.blue(`Downloading`),
690-
});
691-
} else {
692-
safeUpdateTask(ctx, taskId, {
693-
percentage: progressEvent.loaded ? 100 : 0,
694-
message: chalk.blue(`Downloading`),
695-
});
696-
}
697-
},
698-
});
681+
// Separate controller to abort requests that never start streaming
682+
const requestController = new AbortController();
683+
const requestTimeout = setTimeout(() => {
684+
requestController.abort();
685+
}, REQUEST_TIMEOUT_MS);
686+
687+
let response;
688+
try {
689+
response = await axios.get(downloadUrl, {
690+
responseType: 'stream',
691+
timeout: REQUEST_TIMEOUT_MS, // request-level timeout
692+
signal: requestController.signal,
693+
...proxyOptions,
694+
onDownloadProgress: progressEvent => {
695+
if (progressEvent.total) {
696+
// can calculate percentage
697+
safeUpdateTask(ctx, taskId, {
698+
percentage: progressEvent.loaded / progressEvent.total,
699+
message: chalk.blue(`Downloading`),
700+
});
701+
} else {
702+
safeUpdateTask(ctx, taskId, {
703+
percentage: progressEvent.loaded ? 100 : 0,
704+
message: chalk.blue(`Downloading`),
705+
});
706+
}
707+
},
708+
});
709+
} finally {
710+
clearTimeout(requestTimeout);
711+
}
699712
recordProxyOutcome(ctx, proxySelection);
700713

701714
// Check for 500 errors before processing the stream
@@ -913,12 +926,13 @@ async function downloadFile(ctx: ScraperContext, downloadQueueEntry: DownloadQue
913926
if (isAxiosError(error)) {
914927
const status = error.response?.status;
915928
const errorCode = (error as any).code;
929+
const isAbort = errorCode === 'ERR_CANCELED' || (error as any).name === 'CanceledError' || (error as any).name === 'AbortError';
916930

917931
// Handle connection errors (ECONNRESET, ECONNABORTED, ETIMEDOUT)
918-
if (errorCode === 'ECONNRESET' || errorCode === 'ECONNABORTED' || errorCode === 'ETIMEDOUT') {
932+
if (errorCode === 'ECONNRESET' || errorCode === 'ECONNABORTED' || errorCode === 'ETIMEDOUT' || isAbort) {
919933
await recordFailure(ctx, filePath, fileName);
920934
safeUpdateTask(ctx, taskId, {
921-
message: `Connection error (${errorCode}). ${retries < MAX_DOWNLOAD_RETRIES ? 'Retrying...' : 'Failed'}`,
935+
message: `Connection error (${errorCode || 'aborted'}). ${retries < MAX_DOWNLOAD_RETRIES ? 'Retrying...' : 'Failed'}`,
922936
barTransformFn: chalk.red,
923937
});
924938

@@ -1050,8 +1064,15 @@ async function downloadFiles(ctx: ScraperContext, downloadQueue: DownloadQueueEn
10501064
}
10511065

10521066
const MAX_RETRY_PASSES = 3;
1067+
let globalTaskCounter = 0;
1068+
1069+
interface DownloadAllResult {
1070+
completed: number;
1071+
failed: number;
1072+
total: number;
1073+
}
10531074

1054-
async function downloadAllWithRetries(ctx: ScraperContext, downloadQueue: DownloadQueueEntry[]): Promise<void> {
1075+
async function downloadAllWithRetries(ctx: ScraperContext, downloadQueue: DownloadQueueEntry[]): Promise<DownloadAllResult> {
10551076
const totalFiles = downloadQueue.length;
10561077
let currentQueue = downloadQueue;
10571078
let completedTotal = 0;
@@ -1064,6 +1085,12 @@ async function downloadAllWithRetries(ctx: ScraperContext, downloadQueue: Downlo
10641085
console.log(chalk.yellow(`\nRetry pass ${passNumber - 1}: Retrying ${currentQueue.length} failed download(s)...`));
10651086
// Wait a bit before retry pass to let the server recover
10661087
await new Promise(resolve => setTimeout(resolve, 5000));
1088+
1089+
// Assign new unique task IDs for retry pass to avoid conflicts with removed tasks
1090+
currentQueue = currentQueue.map(entry => ({
1091+
...entry,
1092+
taskId: `dl-retry-${passNumber}-${++globalTaskCounter}`,
1093+
}));
10671094
}
10681095

10691096
const result = await downloadFiles(ctx, currentQueue, totalFiles, completedTotal);
@@ -1078,13 +1105,16 @@ async function downloadAllWithRetries(ctx: ScraperContext, downloadQueue: Downlo
10781105
}
10791106

10801107
// After all retries, blacklist any remaining failed items
1081-
if (currentQueue.length > 0) {
1082-
console.log(chalk.red(`\n${currentQueue.length} download(s) failed after ${MAX_RETRY_PASSES} passes, adding to blacklist:`));
1108+
const finalFailed = currentQueue.length;
1109+
if (finalFailed > 0) {
1110+
console.log(chalk.red(`\n${finalFailed} download(s) failed after ${MAX_RETRY_PASSES} passes, adding to blacklist:`));
10831111
for (const task of currentQueue) {
10841112
console.log(chalk.red(` - ${task.fileName}`));
10851113
await addToBlacklist(ctx, task.filePath, task.fileName);
10861114
}
10871115
}
1116+
1117+
return { completed: completedTotal, failed: finalFailed, total: totalFiles };
10881118
}
10891119

10901120
function sanitizeFileName(fileName: string): string {
@@ -1269,11 +1299,20 @@ async function scrapeCreator(ctx: ScraperContext): Promise<void> {
12691299
barTransformFn: chalk.yellow,
12701300
message: 'Starting downloads...',
12711301
});
1272-
await downloadAllWithRetries(ctx, downloadQueue);
1273-
ctx.downloadBars.done(ctx.overallProgressBarId, {
1274-
message: 'All files downloaded.',
1275-
barTransformFn: chalk.green,
1276-
});
1302+
const downloadResult = await downloadAllWithRetries(ctx, downloadQueue);
1303+
1304+
// Show appropriate completion message based on results
1305+
if (downloadResult.failed === 0) {
1306+
ctx.downloadBars.done(ctx.overallProgressBarId, {
1307+
message: 'All files downloaded.',
1308+
barTransformFn: chalk.green,
1309+
});
1310+
} else {
1311+
ctx.downloadBars.done(ctx.overallProgressBarId, {
1312+
message: `${downloadResult.completed}/${downloadResult.total} files downloaded. ${downloadResult.failed} failed (blacklisted).`,
1313+
barTransformFn: chalk.yellow,
1314+
});
1315+
}
12771316

12781317
// Save blacklist one final time before exiting
12791318
await saveBlacklist(ctx);

0 commit comments

Comments
 (0)