From 7dd0f66a89e1e3ce3518e3aa3be52673fd8ef578 Mon Sep 17 00:00:00 2001
From: Josef Prochazka <josef.prochazka@apify.com>
Date: Thu, 2 Oct 2025 15:38:11 +0200
Subject: [PATCH 1/8] WIp, fix failing tests

---
 src/crawlee/crawlers/_basic/_basic_crawler.py | 15 ++++-
 src/crawlee/statistics/_statistics.py         |  5 +-
 .../crawlers/_basic/test_basic_crawler.py     | 56 +++++++++++++++++++
 tests/unit/test_configuration.py              |  8 +--
 4 files changed, 75 insertions(+), 9 deletions(-)

diff --git a/src/crawlee/crawlers/_basic/_basic_crawler.py b/src/crawlee/crawlers/_basic/_basic_crawler.py
index cdf7e527e2..0a97e4ae19 100644
--- a/src/crawlee/crawlers/_basic/_basic_crawler.py
+++ b/src/crawlee/crawlers/_basic/_basic_crawler.py
@@ -11,7 +11,7 @@
 from asyncio import CancelledError
 from collections.abc import AsyncGenerator, Awaitable, Callable, Iterable, Sequence
 from contextlib import AsyncExitStack, suppress
-from datetime import timedelta
+from datetime import datetime, timedelta, timezone
 from functools import partial
 from pathlib import Path
 from typing import TYPE_CHECKING, Any, Generic, Literal, cast
@@ -56,7 +56,7 @@
     SessionError,
     UserDefinedErrorHandlerError,
 )
-from crawlee.events._types import Event, EventCrawlerStatusData
+from crawlee.events._types import Event, EventCrawlerStatusData, EventPersistStateData
 from crawlee.http_clients import ImpitHttpClient
 from crawlee.router import Router
 from crawlee.sessions import SessionPool
@@ -440,6 +440,7 @@ def __init__(
         self._statistics = statistics or cast(
             'Statistics[TStatisticsState]',
             Statistics.with_default_state(
+                persistence_enabled=True,
                 periodic_message_logger=self._logger,
                 statistics_log_format=self._statistics_log_format,
                 log_message='Current request statistics:',
@@ -744,6 +745,16 @@ async def _run_crawler(self) -> None:
 
             await self._autoscaled_pool.run()
 
+            # Emit PERSIST_STATE event when crawler is finishing to allow listeners to persist their state if needed
+            if not self.statistics.state.crawler_last_started_at:
+                raise RuntimeError('Statistics.state.crawler_last_started_at not set.')
+            run_duration = datetime.now(timezone.utc) - self.statistics.state.crawler_last_started_at
+            self._statistics.state.crawler_runtime = self.statistics.state.crawler_runtime + run_duration
+            self._service_locator.get_event_manager().emit(
+                event=Event.PERSIST_STATE, event_data=EventPersistStateData(is_migrating=False)
+            )
+            await asyncio.sleep(10)
+
     async def add_requests(
         self,
         requests: Sequence[str | Request],
diff --git a/src/crawlee/statistics/_statistics.py b/src/crawlee/statistics/_statistics.py
index 2386986001..cc7e430348 100644
--- a/src/crawlee/statistics/_statistics.py
+++ b/src/crawlee/statistics/_statistics.py
@@ -250,8 +250,7 @@ def calculate(self) -> FinalStatistics:
         if self._instance_start is None:
             raise RuntimeError('The Statistics object is not initialized')
 
-        crawler_runtime = datetime.now(timezone.utc) - self._instance_start
-        total_minutes = crawler_runtime.total_seconds() / 60
+        total_minutes = self.state.crawler_runtime.total_seconds() / 60
         state = self._state.current_value
         serialized_state = state.model_dump(by_alias=False)
 
@@ -262,7 +261,7 @@ def calculate(self) -> FinalStatistics:
             requests_failed_per_minute=math.floor(state.requests_failed / total_minutes) if total_minutes else 0,
             request_total_duration=state.request_total_finished_duration + state.request_total_failed_duration,
             requests_total=state.requests_failed + state.requests_finished,
-            crawler_runtime=crawler_runtime,
+            crawler_runtime=state.crawler_runtime,
             requests_finished=state.requests_finished,
             requests_failed=state.requests_failed,
             retry_histogram=serialized_state['request_retry_histogram'],
diff --git a/tests/unit/crawlers/_basic/test_basic_crawler.py b/tests/unit/crawlers/_basic/test_basic_crawler.py
index 09b951ca35..d1a5f98488 100644
--- a/tests/unit/crawlers/_basic/test_basic_crawler.py
+++ b/tests/unit/crawlers/_basic/test_basic_crawler.py
@@ -2,6 +2,7 @@
 from __future__ import annotations
 
 import asyncio
+import concurrent
 import json
 import logging
 import os
@@ -1643,3 +1644,58 @@ async def handler(context: BasicCrawlingContext) -> None:
 
     # Crawler should not fall back to the default storage after the purge
     assert await unrelated_rq.fetch_next_request() == unrelated_request
+
+
+async def _run_crawler(requests: list[str], storage_dir: str) -> StatisticsState:
+    """Run crawler and return its statistics state.
+
+    Must be defined like this to be picklable for ProcessPoolExecutor."""
+    service_locator.set_configuration(
+        Configuration(
+            crawlee_storage_dir=storage_dir,  # type: ignore[call-arg]
+            purge_on_start=False,
+        )
+    )
+
+    async def request_handler(context: BasicCrawlingContext) -> None:
+        context.log.info(f'Processing {context.request.url} ...')
+
+    crawler = BasicCrawler(
+        request_handler=request_handler,
+        concurrency_settings=ConcurrencySettings(max_concurrency=1, desired_concurrency=1),
+    )
+
+    await crawler.run(requests)
+    return crawler.statistics.state
+
+
+def _process_run_crawler(requests: list[str], storage_dir: str) -> StatisticsState:
+    return asyncio.run(_run_crawler(requests=requests, storage_dir=storage_dir))
+
+
+async def test_crawler_statistics_persistence(tmp_path: Path) -> None:
+    """Test that crawler statistics persist and are loaded correctly.
+
+    This test simulates starting the crawler process twice, and checks that the statistics include first run."""
+
+    with concurrent.futures.ProcessPoolExecutor() as executor:
+        # Crawl 2 requests in the first run and automatically persist the state.
+        first_run_state = executor.submit(
+            _process_run_crawler,
+            requests=['https://a.placeholder.com', 'https://b.placeholder.com'],
+            storage_dir=str(tmp_path),
+        ).result()
+        assert first_run_state.requests_finished == 2
+
+        # Crawl 1 additional requests in the second run, but use previously automatically persisted state.
+        second_run_state = executor.submit(
+            _process_run_crawler, requests=['https://c.placeholder.com'], storage_dir=str(tmp_path)
+        ).result()
+        assert second_run_state.requests_finished == 3
+
+    assert first_run_state.crawler_started_at == second_run_state.crawler_started_at
+    assert first_run_state.crawler_finished_at
+    assert second_run_state.crawler_finished_at
+
+    assert first_run_state.crawler_finished_at < second_run_state.crawler_finished_at
+    assert first_run_state.crawler_runtime < second_run_state.crawler_runtime
diff --git a/tests/unit/test_configuration.py b/tests/unit/test_configuration.py
index f89401e5be..f1bb244170 100644
--- a/tests/unit/test_configuration.py
+++ b/tests/unit/test_configuration.py
@@ -41,10 +41,10 @@ async def test_storage_not_persisted_when_disabled(tmp_path: Path, server_url: U
     )
     storage_client = MemoryStorageClient()
 
-    crawler = HttpCrawler(
-        configuration=configuration,
-        storage_client=storage_client,
-    )
+    service_locator.set_configuration(configuration)
+    service_locator.set_storage_client(storage_client)
+
+    crawler = HttpCrawler()
 
     @crawler.router.default_handler
     async def default_handler(context: HttpCrawlingContext) -> None:

From f3b981284ce9dc123e71392cd0e5425a14227321 Mon Sep 17 00:00:00 2001
From: Josef Prochazka <josef.prochazka@apify.com>
Date: Thu, 2 Oct 2025 15:53:34 +0200
Subject: [PATCH 2/8] Add comment for remembering where left

---
 src/crawlee/crawlers/_basic/_basic_crawler.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/crawlee/crawlers/_basic/_basic_crawler.py b/src/crawlee/crawlers/_basic/_basic_crawler.py
index 0a97e4ae19..408125458b 100644
--- a/src/crawlee/crawlers/_basic/_basic_crawler.py
+++ b/src/crawlee/crawlers/_basic/_basic_crawler.py
@@ -440,7 +440,7 @@ def __init__(
         self._statistics = statistics or cast(
             'Statistics[TStatisticsState]',
             Statistics.with_default_state(
-                persistence_enabled=True,
+                persistence_enabled=True, # TODO: Why does changing this to True breaks unrelated tests?
                 periodic_message_logger=self._logger,
                 statistics_log_format=self._statistics_log_format,
                 log_message='Current request statistics:',
@@ -753,7 +753,6 @@ async def _run_crawler(self) -> None:
             self._service_locator.get_event_manager().emit(
                 event=Event.PERSIST_STATE, event_data=EventPersistStateData(is_migrating=False)
             )
-            await asyncio.sleep(10)
 
     async def add_requests(
         self,

From 3a28d42f046bff51a311b53a029422d85ad48d8a Mon Sep 17 00:00:00 2001
From: Josef Prochazka <josef.prochazka@apify.com>
Date: Wed, 15 Oct 2025 14:14:50 +0200
Subject: [PATCH 3/8] Start `_crawler_state_rec_task` when active contexts (to
 allow persistent state init)

---
 src/crawlee/crawlers/_basic/_basic_crawler.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/crawlee/crawlers/_basic/_basic_crawler.py b/src/crawlee/crawlers/_basic/_basic_crawler.py
index 408125458b..65d7dfa887 100644
--- a/src/crawlee/crawlers/_basic/_basic_crawler.py
+++ b/src/crawlee/crawlers/_basic/_basic_crawler.py
@@ -440,7 +440,7 @@ def __init__(
         self._statistics = statistics or cast(
             'Statistics[TStatisticsState]',
             Statistics.with_default_state(
-                persistence_enabled=True, # TODO: Why does changing this to True breaks unrelated tests?
+                persistence_enabled=True,
                 periodic_message_logger=self._logger,
                 statistics_log_format=self._statistics_log_format,
                 log_message='Current request statistics:',
@@ -722,8 +722,6 @@ def sigint_handler() -> None:
     async def _run_crawler(self) -> None:
         event_manager = self._service_locator.get_event_manager()
 
-        self._crawler_state_rec_task.start()
-
         # Collect the context managers to be entered. Context managers that are already active are excluded,
         # as they were likely entered by the caller, who will also be responsible for exiting them.
         contexts_to_enter = [
@@ -743,6 +741,7 @@ async def _run_crawler(self) -> None:
             for context in contexts_to_enter:
                 await exit_stack.enter_async_context(context)  # type: ignore[arg-type]
 
+            self._crawler_state_rec_task.start()
             await self._autoscaled_pool.run()
 
             # Emit PERSIST_STATE event when crawler is finishing to allow listeners to persist their state if needed

From ebc350d81d12aab672ac08651d10e9daec0735e9 Mon Sep 17 00:00:00 2001
From: Josef Prochazka <josef.prochazka@apify.com>
Date: Wed, 15 Oct 2025 16:25:10 +0200
Subject: [PATCH 4/8] Test Windows related issue

---
 src/crawlee/crawlers/_basic/_basic_crawler.py    | 6 ++++--
 tests/unit/crawlers/_basic/test_basic_crawler.py | 3 ++-
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/crawlee/crawlers/_basic/_basic_crawler.py b/src/crawlee/crawlers/_basic/_basic_crawler.py
index 65d7dfa887..b3d2c46db8 100644
--- a/src/crawlee/crawlers/_basic/_basic_crawler.py
+++ b/src/crawlee/crawlers/_basic/_basic_crawler.py
@@ -690,7 +690,6 @@ def sigint_handler() -> None:
         except CancelledError:
             pass
         finally:
-            await self._crawler_state_rec_task.stop()
             if threading.current_thread() is threading.main_thread():
                 with suppress(NotImplementedError):
                     asyncio.get_running_loop().remove_signal_handler(signal.SIGINT)
@@ -742,7 +741,10 @@ async def _run_crawler(self) -> None:
                 await exit_stack.enter_async_context(context)  # type: ignore[arg-type]
 
             self._crawler_state_rec_task.start()
-            await self._autoscaled_pool.run()
+            try:
+                await self._autoscaled_pool.run()
+            finally:
+                await self._crawler_state_rec_task.stop()
 
             # Emit PERSIST_STATE event when crawler is finishing to allow listeners to persist their state if needed
             if not self.statistics.state.crawler_last_started_at:
diff --git a/tests/unit/crawlers/_basic/test_basic_crawler.py b/tests/unit/crawlers/_basic/test_basic_crawler.py
index d1a5f98488..9d61ecc896 100644
--- a/tests/unit/crawlers/_basic/test_basic_crawler.py
+++ b/tests/unit/crawlers/_basic/test_basic_crawler.py
@@ -5,6 +5,7 @@
 import concurrent
 import json
 import logging
+import multiprocessing
 import os
 import sys
 import time
@@ -1678,7 +1679,7 @@ async def test_crawler_statistics_persistence(tmp_path: Path) -> None:
 
     This test simulates starting the crawler process twice, and checks that the statistics include first run."""
 
-    with concurrent.futures.ProcessPoolExecutor() as executor:
+    with concurrent.futures.ProcessPoolExecutor(mp_context=multiprocessing.get_context('fork')) as executor:
         # Crawl 2 requests in the first run and automatically persist the state.
         first_run_state = executor.submit(
             _process_run_crawler,

From 52daca0a69c31a185dd15feac97fd1e2fee370a2 Mon Sep 17 00:00:00 2001
From: Josef Prochazka <josef.prochazka@apify.com>
Date: Thu, 16 Oct 2025 11:26:08 +0200
Subject: [PATCH 5/8] Fix test on windows

---
 tests/unit/crawlers/_basic/test_basic_crawler.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/tests/unit/crawlers/_basic/test_basic_crawler.py b/tests/unit/crawlers/_basic/test_basic_crawler.py
index 9d61ecc896..b2b75e50f7 100644
--- a/tests/unit/crawlers/_basic/test_basic_crawler.py
+++ b/tests/unit/crawlers/_basic/test_basic_crawler.py
@@ -5,7 +5,6 @@
 import concurrent
 import json
 import logging
-import multiprocessing
 import os
 import sys
 import time
@@ -1650,7 +1649,7 @@ async def handler(context: BasicCrawlingContext) -> None:
 async def _run_crawler(requests: list[str], storage_dir: str) -> StatisticsState:
     """Run crawler and return its statistics state.
 
-    Must be defined like this to be picklable for ProcessPoolExecutor."""
+    Must be defined like this to be pickable for ProcessPoolExecutor."""
     service_locator.set_configuration(
         Configuration(
             crawlee_storage_dir=storage_dir,  # type: ignore[call-arg]
@@ -1679,7 +1678,7 @@ async def test_crawler_statistics_persistence(tmp_path: Path) -> None:
 
     This test simulates starting the crawler process twice, and checks that the statistics include first run."""
 
-    with concurrent.futures.ProcessPoolExecutor(mp_context=multiprocessing.get_context('fork')) as executor:
+    with concurrent.futures.ProcessPoolExecutor() as executor:
         # Crawl 2 requests in the first run and automatically persist the state.
         first_run_state = executor.submit(
             _process_run_crawler,
@@ -1688,6 +1687,8 @@ async def test_crawler_statistics_persistence(tmp_path: Path) -> None:
         ).result()
         assert first_run_state.requests_finished == 2
 
+    # Do not reuse the executor to simulate a fresh process to avoid modified class attributes.
+    with concurrent.futures.ProcessPoolExecutor() as executor:
         # Crawl 1 additional requests in the second run, but use previously automatically persisted state.
         second_run_state = executor.submit(
             _process_run_crawler, requests=['https://c.placeholder.com'], storage_dir=str(tmp_path)

From 675dadf19297aa667658f4692dd7e6811cc677b5 Mon Sep 17 00:00:00 2001
From: Josef Prochazka <josef.prochazka@apify.com>
Date: Fri, 17 Oct 2025 16:54:24 +0200
Subject: [PATCH 6/8] Review comments

TODO: Figure out reason for stats difference in request_total_finished_duration
---
 src/crawlee/_utils/recurring_task.py          | 15 +++++++++++++++
 src/crawlee/crawlers/_basic/_basic_crawler.py | 15 +++------------
 src/crawlee/statistics/_statistics.py         |  7 +++++--
 3 files changed, 23 insertions(+), 14 deletions(-)

diff --git a/src/crawlee/_utils/recurring_task.py b/src/crawlee/_utils/recurring_task.py
index 8ffc71d144..d0c20249e9 100644
--- a/src/crawlee/_utils/recurring_task.py
+++ b/src/crawlee/_utils/recurring_task.py
@@ -7,6 +7,9 @@
 if TYPE_CHECKING:
     from collections.abc import Callable
     from datetime import timedelta
+    from types import TracebackType
+
+    from typing_extensions import Self
 
 logger = getLogger(__name__)
 
@@ -26,6 +29,18 @@ def __init__(self, func: Callable, delay: timedelta) -> None:
         self.delay = delay
         self.task: asyncio.Task | None = None
 
+    async def __aenter__(self) -> Self:
+        self.start()
+        return self
+
+    async def __aexit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc_value: BaseException | None,
+        exc_traceback: TracebackType | None,
+    ) -> None:
+        await self.stop()
+
     async def _wrapper(self) -> None:
         """Continuously execute the provided function with the specified delay.
 
diff --git a/src/crawlee/crawlers/_basic/_basic_crawler.py b/src/crawlee/crawlers/_basic/_basic_crawler.py
index b3d2c46db8..92c32c7e04 100644
--- a/src/crawlee/crawlers/_basic/_basic_crawler.py
+++ b/src/crawlee/crawlers/_basic/_basic_crawler.py
@@ -11,7 +11,7 @@
 from asyncio import CancelledError
 from collections.abc import AsyncGenerator, Awaitable, Callable, Iterable, Sequence
 from contextlib import AsyncExitStack, suppress
-from datetime import datetime, timedelta, timezone
+from datetime import timedelta
 from functools import partial
 from pathlib import Path
 from typing import TYPE_CHECKING, Any, Generic, Literal, cast
@@ -740,20 +740,11 @@ async def _run_crawler(self) -> None:
             for context in contexts_to_enter:
                 await exit_stack.enter_async_context(context)  # type: ignore[arg-type]
 
-            self._crawler_state_rec_task.start()
-            try:
+            async with self._crawler_state_rec_task:
                 await self._autoscaled_pool.run()
-            finally:
-                await self._crawler_state_rec_task.stop()
 
             # Emit PERSIST_STATE event when crawler is finishing to allow listeners to persist their state if needed
-            if not self.statistics.state.crawler_last_started_at:
-                raise RuntimeError('Statistics.state.crawler_last_started_at not set.')
-            run_duration = datetime.now(timezone.utc) - self.statistics.state.crawler_last_started_at
-            self._statistics.state.crawler_runtime = self.statistics.state.crawler_runtime + run_duration
-            self._service_locator.get_event_manager().emit(
-                event=Event.PERSIST_STATE, event_data=EventPersistStateData(is_migrating=False)
-            )
+            event_manager.emit(event=Event.PERSIST_STATE, event_data=EventPersistStateData(is_migrating=False))
 
     async def add_requests(
         self,
diff --git a/src/crawlee/statistics/_statistics.py b/src/crawlee/statistics/_statistics.py
index 687a6b824d..f55d00a782 100644
--- a/src/crawlee/statistics/_statistics.py
+++ b/src/crawlee/statistics/_statistics.py
@@ -96,7 +96,7 @@ def __init__(
 
         self._state = RecoverableState(
             default_state=state_model(stats_id=self._id),
-            persist_state_key=persist_state_key or f'SDK_CRAWLER_STATISTICS_{self._id}',
+            persist_state_key=persist_state_key or f'__CRAWLER_STATISTICS_{self._id}',
             persistence_enabled=persistence_enabled,
             persist_state_kvs_name=persist_state_kvs_name,
             persist_state_kvs_factory=persist_state_kvs_factory,
@@ -187,7 +187,10 @@ async def __aexit__(
         if not self._active:
             raise RuntimeError(f'The {self.__class__.__name__} is not active.')
 
-        self._state.current_value.crawler_finished_at = datetime.now(timezone.utc)
+        if not self.state.crawler_last_started_at:
+            raise RuntimeError('Statistics.state.crawler_last_started_at not set.')
+        self.state.crawler_finished_at = datetime.now(timezone.utc)
+        self.state.crawler_runtime += self.state.crawler_finished_at - self.state.crawler_last_started_at
 
         await self._state.teardown()
 

From 37f64732caab04afd0a978ff9e891573863dc167 Mon Sep 17 00:00:00 2001
From: Josef Prochazka <josef.prochazka@apify.com>
Date: Mon, 20 Oct 2025 11:05:54 +0200
Subject: [PATCH 7/8] Add _crawler_state_rec_task to other context managers

---
 src/crawlee/crawlers/_basic/_basic_crawler.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/crawlee/crawlers/_basic/_basic_crawler.py b/src/crawlee/crawlers/_basic/_basic_crawler.py
index 92c32c7e04..36402c626c 100644
--- a/src/crawlee/crawlers/_basic/_basic_crawler.py
+++ b/src/crawlee/crawlers/_basic/_basic_crawler.py
@@ -731,6 +731,7 @@ async def _run_crawler(self) -> None:
                 self._statistics,
                 self._session_pool if self._use_session_pool else None,
                 self._http_client,
+                self._crawler_state_rec_task,
                 *self._additional_context_managers,
             )
             if cm and getattr(cm, 'active', False) is False
@@ -740,8 +741,7 @@ async def _run_crawler(self) -> None:
             for context in contexts_to_enter:
                 await exit_stack.enter_async_context(context)  # type: ignore[arg-type]
 
-            async with self._crawler_state_rec_task:
-                await self._autoscaled_pool.run()
+            await self._autoscaled_pool.run()
 
             # Emit PERSIST_STATE event when crawler is finishing to allow listeners to persist their state if needed
             event_manager.emit(event=Event.PERSIST_STATE, event_data=EventPersistStateData(is_migrating=False))

From 00e65ec39486ae9fdbe256deebaf760f80cf946c Mon Sep 17 00:00:00 2001
From: Josef Prochazka <josef.prochazka@apify.com>
Date: Thu, 23 Oct 2025 11:08:35 +0200
Subject: [PATCH 8/8] Persist Crawler statistics to Crawler KVS

---
 src/crawlee/crawlers/_basic/_basic_crawler.py | 26 +++++++-----
 src/crawlee/statistics/_statistics.py         |  2 +
 tests/unit/test_configuration.py              | 40 +++++++++++++++----
 3 files changed, 52 insertions(+), 16 deletions(-)

diff --git a/src/crawlee/crawlers/_basic/_basic_crawler.py b/src/crawlee/crawlers/_basic/_basic_crawler.py
index 36402c626c..28544ad607 100644
--- a/src/crawlee/crawlers/_basic/_basic_crawler.py
+++ b/src/crawlee/crawlers/_basic/_basic_crawler.py
@@ -437,15 +437,23 @@ def __init__(
         self._statistics_log_format = statistics_log_format
 
         # Statistics
-        self._statistics = statistics or cast(
-            'Statistics[TStatisticsState]',
-            Statistics.with_default_state(
-                persistence_enabled=True,
-                periodic_message_logger=self._logger,
-                statistics_log_format=self._statistics_log_format,
-                log_message='Current request statistics:',
-            ),
-        )
+        if statistics:
+            self._statistics = statistics
+        else:
+
+            async def persist_state_factory() -> KeyValueStore:
+                return await self.get_key_value_store()
+
+            self._statistics = cast(
+                'Statistics[TStatisticsState]',
+                Statistics.with_default_state(
+                    persistence_enabled=True,
+                    periodic_message_logger=self._logger,
+                    statistics_log_format=self._statistics_log_format,
+                    log_message='Current request statistics:',
+                    persist_state_kvs_factory=persist_state_factory,
+                ),
+            )
 
         # Additional context managers to enter and exit
         self._additional_context_managers = _additional_context_managers or []
diff --git a/src/crawlee/statistics/_statistics.py b/src/crawlee/statistics/_statistics.py
index f55d00a782..68b4ff6551 100644
--- a/src/crawlee/statistics/_statistics.py
+++ b/src/crawlee/statistics/_statistics.py
@@ -130,6 +130,7 @@ def with_default_state(
         persistence_enabled: bool = False,
         persist_state_kvs_name: str | None = None,
         persist_state_key: str | None = None,
+        persist_state_kvs_factory: Callable[[], Coroutine[None, None, KeyValueStore]] | None = None,
         log_message: str = 'Statistics',
         periodic_message_logger: Logger | None = None,
         log_interval: timedelta = timedelta(minutes=1),
@@ -141,6 +142,7 @@ def with_default_state(
             persistence_enabled=persistence_enabled,
             persist_state_kvs_name=persist_state_kvs_name,
             persist_state_key=persist_state_key,
+            persist_state_kvs_factory=persist_state_kvs_factory,
             log_message=log_message,
             periodic_message_logger=periodic_message_logger,
             log_interval=log_interval,
diff --git a/tests/unit/test_configuration.py b/tests/unit/test_configuration.py
index f1bb244170..4be4309247 100644
--- a/tests/unit/test_configuration.py
+++ b/tests/unit/test_configuration.py
@@ -8,6 +8,7 @@
 from crawlee import service_locator
 from crawlee.configuration import Configuration
 from crawlee.crawlers import HttpCrawler, HttpCrawlingContext
+from crawlee.statistics import Statistics
 from crawlee.storage_clients import MemoryStorageClient
 from crawlee.storage_clients._file_system._storage_client import FileSystemStorageClient
 
@@ -35,16 +36,41 @@ def test_global_configuration_works_reversed() -> None:
     )
 
 
-async def test_storage_not_persisted_when_disabled(tmp_path: Path, server_url: URL) -> None:
+async def test_storage_not_persisted_when_non_persistable_storage_used(tmp_path: Path, server_url: URL) -> None:
+    """Make the Crawler use MemoryStorageClient which can't persist state."""
+    service_locator.set_configuration(
+        Configuration(
+            crawlee_storage_dir=str(tmp_path),  # type: ignore[call-arg]
+        )
+    )
+    crawler = HttpCrawler(storage_client=MemoryStorageClient())
+
+    @crawler.router.default_handler
+    async def default_handler(context: HttpCrawlingContext) -> None:
+        await context.push_data({'url': context.request.url})
+
+    await crawler.run([str(server_url)])
+
+    # Verify that no files were created in the storage directory.
+    content = list(tmp_path.iterdir())
+    assert content == [], 'Expected the storage directory to be empty, but it is not.'
+
+
+async def test_storage_persisted_with_explicit_statistics_with_persistable_storage(
+    tmp_path: Path, server_url: URL
+) -> None:
+    """Make the Crawler use MemoryStorageClient which can't persist state,
+    but pass explicit statistics to it which will use global FileSystemStorageClient() that can persist state."""
+
     configuration = Configuration(
         crawlee_storage_dir=str(tmp_path),  # type: ignore[call-arg]
     )
-    storage_client = MemoryStorageClient()
-
     service_locator.set_configuration(configuration)
-    service_locator.set_storage_client(storage_client)
+    service_locator.set_storage_client(FileSystemStorageClient())
 
-    crawler = HttpCrawler()
+    crawler = HttpCrawler(
+        storage_client=MemoryStorageClient(), statistics=Statistics.with_default_state(persistence_enabled=True)
+    )
 
     @crawler.router.default_handler
     async def default_handler(context: HttpCrawlingContext) -> None:
@@ -52,9 +78,9 @@ async def default_handler(context: HttpCrawlingContext) -> None:
 
     await crawler.run([str(server_url)])
 
-    # Verify that no files were created in the storage directory.
+    # Verify that files were created in the storage directory.
     content = list(tmp_path.iterdir())
-    assert content == [], 'Expected the storage directory to be empty, but it is not.'
+    assert content != [], 'Expected the storage directory to contain files, but it does not.'
 
 
 async def test_storage_persisted_when_enabled(tmp_path: Path, server_url: URL) -> None: