Skip to content

Commit 675dadf

Browse files
committed
Review comments
TODO: Figure out reason for stats difference in request_total_finished_duration
1 parent b2b4724 commit 675dadf

File tree

3 files changed

+23
-14
lines changed

3 files changed

+23
-14
lines changed

src/crawlee/_utils/recurring_task.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@
77
if TYPE_CHECKING:
88
from collections.abc import Callable
99
from datetime import timedelta
10+
from types import TracebackType
11+
12+
from typing_extensions import Self
1013

1114
logger = getLogger(__name__)
1215

@@ -26,6 +29,18 @@ def __init__(self, func: Callable, delay: timedelta) -> None:
2629
self.delay = delay
2730
self.task: asyncio.Task | None = None
2831

32+
async def __aenter__(self) -> Self:
33+
self.start()
34+
return self
35+
36+
async def __aexit__(
37+
self,
38+
exc_type: type[BaseException] | None,
39+
exc_value: BaseException | None,
40+
exc_traceback: TracebackType | None,
41+
) -> None:
42+
await self.stop()
43+
2944
async def _wrapper(self) -> None:
3045
"""Continuously execute the provided function with the specified delay.
3146

src/crawlee/crawlers/_basic/_basic_crawler.py

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from asyncio import CancelledError
1212
from collections.abc import AsyncGenerator, Awaitable, Callable, Iterable, Sequence
1313
from contextlib import AsyncExitStack, suppress
14-
from datetime import datetime, timedelta, timezone
14+
from datetime import timedelta
1515
from functools import partial
1616
from pathlib import Path
1717
from typing import TYPE_CHECKING, Any, Generic, Literal, cast
@@ -740,20 +740,11 @@ async def _run_crawler(self) -> None:
740740
for context in contexts_to_enter:
741741
await exit_stack.enter_async_context(context) # type: ignore[arg-type]
742742

743-
self._crawler_state_rec_task.start()
744-
try:
743+
async with self._crawler_state_rec_task:
745744
await self._autoscaled_pool.run()
746-
finally:
747-
await self._crawler_state_rec_task.stop()
748745

749746
# Emit PERSIST_STATE event when crawler is finishing to allow listeners to persist their state if needed
750-
if not self.statistics.state.crawler_last_started_at:
751-
raise RuntimeError('Statistics.state.crawler_last_started_at not set.')
752-
run_duration = datetime.now(timezone.utc) - self.statistics.state.crawler_last_started_at
753-
self._statistics.state.crawler_runtime = self.statistics.state.crawler_runtime + run_duration
754-
self._service_locator.get_event_manager().emit(
755-
event=Event.PERSIST_STATE, event_data=EventPersistStateData(is_migrating=False)
756-
)
747+
event_manager.emit(event=Event.PERSIST_STATE, event_data=EventPersistStateData(is_migrating=False))
757748

758749
async def add_requests(
759750
self,

src/crawlee/statistics/_statistics.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ def __init__(
9696

9797
self._state = RecoverableState(
9898
default_state=state_model(stats_id=self._id),
99-
persist_state_key=persist_state_key or f'SDK_CRAWLER_STATISTICS_{self._id}',
99+
persist_state_key=persist_state_key or f'__CRAWLER_STATISTICS_{self._id}',
100100
persistence_enabled=persistence_enabled,
101101
persist_state_kvs_name=persist_state_kvs_name,
102102
persist_state_kvs_factory=persist_state_kvs_factory,
@@ -187,7 +187,10 @@ async def __aexit__(
187187
if not self._active:
188188
raise RuntimeError(f'The {self.__class__.__name__} is not active.')
189189

190-
self._state.current_value.crawler_finished_at = datetime.now(timezone.utc)
190+
if not self.state.crawler_last_started_at:
191+
raise RuntimeError('Statistics.state.crawler_last_started_at not set.')
192+
self.state.crawler_finished_at = datetime.now(timezone.utc)
193+
self.state.crawler_runtime += self.state.crawler_finished_at - self.state.crawler_last_started_at
191194

192195
await self._state.teardown()
193196

0 commit comments

Comments
 (0)