From dca1ffd4d3d03aadbb5e19e409c801409c127405 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jind=C5=99ich=20B=C3=A4r?= Date: Tue, 6 Jan 2026 14:56:44 +0100 Subject: [PATCH 1/6] fix: await multiple `BasicCrawler.stop()` calls correctly --- .../src/internals/basic-crawler.ts | 23 +++++++++++-------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/packages/basic-crawler/src/internals/basic-crawler.ts b/packages/basic-crawler/src/internals/basic-crawler.ts index a9eeb1461fda..76ef822b7f0f 100644 --- a/packages/basic-crawler/src/internals/basic-crawler.ts +++ b/packages/basic-crawler/src/internals/basic-crawler.ts @@ -561,6 +561,7 @@ export class BasicCrawler; private _closeEvents?: boolean; private shouldLogMaxProcessedRequestsExceeded = true; private shouldLogMaxEnqueuedRequestsExceeded = true; @@ -978,6 +979,7 @@ export class BasicCrawler this.autoscaledPool?.abort()) - .then(() => this.log.info(message)) - .catch((err) => { - this.log.error('An error occurred when stopping the crawler:', err); - }); + if (!this.stoppingPromise) { + // Gracefully starve the this.autoscaledPool, so it doesn't start new tasks. Resolves once the pool is cleared. + this.stoppingPromise = this.autoscaledPool + ?.pause() + // Resolves the `autoscaledPool.run()` promise in the `BasicCrawler.run()` method. Since the pool is already paused, it resolves immediately and doesn't kill any tasks. + .then(async () => this.autoscaledPool?.abort()) + .then(() => this.log.info(message)) + .catch((err) => { + this.log.error('An error occurred when stopping the crawler:', err); + }); + } } async getRequestQueue(): Promise { From 8496c2db764cbf17f4c10b438fbaaf6f4aefda1e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jind=C5=99ich=20B=C3=A4r?= Date: Tue, 6 Jan 2026 15:03:08 +0100 Subject: [PATCH 2/6] chore: fix linter errors --- packages/basic-crawler/src/internals/basic-crawler.ts | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/packages/basic-crawler/src/internals/basic-crawler.ts b/packages/basic-crawler/src/internals/basic-crawler.ts index 76ef822b7f0f..7ca6a55eb31b 100644 --- a/packages/basic-crawler/src/internals/basic-crawler.ts +++ b/packages/basic-crawler/src/internals/basic-crawler.ts @@ -979,7 +979,7 @@ export class BasicCrawler Date: Wed, 7 Jan 2026 08:35:52 +0100 Subject: [PATCH 3/6] chore: use Python implementation for `.stop()` --- .../src/internals/basic-crawler.ts | 35 ++++++++++--------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/packages/basic-crawler/src/internals/basic-crawler.ts b/packages/basic-crawler/src/internals/basic-crawler.ts index 7ca6a55eb31b..f0774a79bbcd 100644 --- a/packages/basic-crawler/src/internals/basic-crawler.ts +++ b/packages/basic-crawler/src/internals/basic-crawler.ts @@ -536,6 +536,7 @@ export class BasicCrawler; @@ -561,7 +562,6 @@ export class BasicCrawler; private _closeEvents?: boolean; private shouldLogMaxProcessedRequestsExceeded = true; private shouldLogMaxEnqueuedRequestsExceeded = true; @@ -821,6 +821,14 @@ export class BasicCrawler { @@ -833,6 +841,11 @@ export class BasicCrawler this.autoscaledPool?.abort()) - .then(() => this.log.info(message)) - .catch((err) => { - this.log.error('An error occurred when stopping the crawler:', err); - }); - } + stop(reason = 'The crawler has been gracefully stopped.'): void { + this.log.info(reason); + this.unexpectedStop = true; } async getRequestQueue(): Promise { From e9d152ca2ba50145131223ddc2eadf4c27f5ddae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jind=C5=99ich=20B=C3=A4r?= Date: Wed, 7 Jan 2026 08:57:36 +0100 Subject: [PATCH 4/6] chore: improve logging for `.stop()` method --- .../src/internals/basic-crawler.ts | 20 ++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/packages/basic-crawler/src/internals/basic-crawler.ts b/packages/basic-crawler/src/internals/basic-crawler.ts index f0774a79bbcd..d355a2c7207e 100644 --- a/packages/basic-crawler/src/internals/basic-crawler.ts +++ b/packages/basic-crawler/src/internals/basic-crawler.ts @@ -565,6 +565,7 @@ export class BasicCrawler; private _experimentWarnings: Partial> = {}; @@ -822,10 +823,13 @@ export class BasicCrawler Date: Thu, 8 Jan 2026 10:42:57 +0100 Subject: [PATCH 5/6] chore: consolidate termination message flags --- .../basic-crawler/src/internals/basic-crawler.ts | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/packages/basic-crawler/src/internals/basic-crawler.ts b/packages/basic-crawler/src/internals/basic-crawler.ts index d355a2c7207e..e1dd1e6469a4 100644 --- a/packages/basic-crawler/src/internals/basic-crawler.ts +++ b/packages/basic-crawler/src/internals/basic-crawler.ts @@ -563,9 +563,8 @@ export class BasicCrawler; private _experimentWarnings: Partial> = {}; @@ -812,23 +811,23 @@ export class BasicCrawler { if (isMaxPagesExceeded()) { - if (this.shouldLogMaxProcessedRequestsExceeded) { + if (this.shouldLogShuttingDown) { log.info( 'Crawler reached the maxRequestsPerCrawl limit of ' + `${this.maxRequestsPerCrawl} requests and will shut down soon. Requests that are in progress will be allowed to finish.`, ); - this.shouldLogMaxProcessedRequestsExceeded = false; + this.shouldLogShuttingDown = false; } return false; } if (this.unexpectedStop) { - if (this.shouldLogUnexpectedStop) { + if (this.shouldLogShuttingDown) { this.log.info( 'No new requests are allowed because the `stop()` method has been called. ' + 'Ongoing requests will be allowed to complete.', ); - this.shouldLogUnexpectedStop = false; + this.shouldLogShuttingDown = false; } return false; } @@ -999,9 +998,8 @@ export class BasicCrawler Date: Thu, 8 Jan 2026 11:58:14 +0100 Subject: [PATCH 6/6] chore: set the `shouldLogShuttingDown` on all termination paths --- packages/basic-crawler/src/internals/basic-crawler.ts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/packages/basic-crawler/src/internals/basic-crawler.ts b/packages/basic-crawler/src/internals/basic-crawler.ts index e1dd1e6469a4..4f60768fede0 100644 --- a/packages/basic-crawler/src/internals/basic-crawler.ts +++ b/packages/basic-crawler/src/internals/basic-crawler.ts @@ -841,6 +841,7 @@ export class BasicCrawler