Skip to content

Commit 902a822

Browse files
authored
docs: Use and enforce imperative mood (#1100)
- Currently, our docstrings use a mix of imperative and indicative moods basically randomly. - PEP 257 recommends using the imperative mood for the first line of docstrings, which is also enforced by the Ruff rule [D401](https://docs.astral.sh/ruff/rules/non-imperative-mood/#non-imperative-mood-d401). So I believe the imperative form is more suitable for the docs purposes. - In most cases, I have simply changed the verb form, but a few cases required more rewording. - I updated the pyproject configuration to enforce it.
1 parent 7fb36b5 commit 902a822

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

59 files changed

+233
-172
lines changed

pyproject.toml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,9 @@ ignore = [
131131
"D100", # Missing docstring in public module
132132
"D104", # Missing docstring in public package
133133
"D107", # Missing docstring in `__init__`
134+
"D203", # One blank line required before class docstring
135+
"D213", # Multi-line docstring summary should start at the second line
136+
"D413", # Missing blank line after last section
134137
"EM", # flake8-errmsg
135138
"G004", # Logging statement uses f-string
136139
"ISC001", # This rule may cause conflicts when used with the formatter
@@ -188,9 +191,6 @@ runtime-evaluated-base-classes = [
188191
[tool.ruff.lint.flake8-builtins]
189192
builtins-ignorelist = ["id"]
190193

191-
[tool.ruff.lint.pydocstyle]
192-
convention = "google"
193-
194194
[tool.ruff.lint.isort]
195195
known-first-party = ["crawlee"]
196196

src/crawlee/_autoscaling/autoscaled_pool.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ def __init__(
7070
is_task_ready_function: Callable[[], Awaitable[bool]],
7171
is_finished_function: Callable[[], Awaitable[bool]],
7272
) -> None:
73-
"""A default constructor.
73+
"""Initialize a new instance.
7474
7575
Args:
7676
system_status: Provides data about system utilization (load).
@@ -211,7 +211,7 @@ def _log_system_status(self) -> None:
211211
)
212212

213213
async def _worker_task_orchestrator(self, run: _AutoscaledPoolRun) -> None:
214-
"""Launches worker tasks whenever there is free capacity and a task is ready.
214+
"""Launch worker tasks whenever there is free capacity and a task is ready.
215215
216216
Exits when `is_finished_function` returns True.
217217
"""
@@ -260,11 +260,11 @@ async def _worker_task_orchestrator(self, run: _AutoscaledPoolRun) -> None:
260260
run.result.set_result(object())
261261

262262
def _reap_worker_task(self, task: asyncio.Task, run: _AutoscaledPoolRun) -> None:
263-
"""A callback for finished worker tasks.
263+
"""Handle cleanup and tracking of a completed worker task.
264264
265-
- It interrupts the run in case of an exception,
266-
- keeps track of tasks in progress,
267-
- notifies the orchestrator
265+
- Interrupt the run if the task encountered an exception.
266+
- Update the list of tasks in progress.
267+
- Notify the orchestrator about the task completion.
268268
"""
269269
run.worker_tasks_updated.set()
270270
run.worker_tasks.remove(task)

src/crawlee/_autoscaling/snapshotter.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ def __init__(
6565
max_client_errors: int,
6666
max_memory_size: ByteSize,
6767
) -> None:
68-
"""A default constructor.
68+
"""Initialize a new instance.
6969
7070
In most cases, you should use the `from_config` constructor to create a new instance based on
7171
the provided configuration.
@@ -102,7 +102,7 @@ def __init__(
102102

103103
@classmethod
104104
def from_config(cls, config: Configuration | None = None) -> Snapshotter:
105-
"""Create a new instance based on the provided `Configuration`.
105+
"""Initialize a new instance based on the provided `Configuration`.
106106
107107
Args:
108108
config: The `Configuration` instance. Uses the global (default) one if not provided.
@@ -136,7 +136,7 @@ def active(self) -> bool:
136136
return self._active
137137

138138
async def __aenter__(self) -> Snapshotter:
139-
"""Starts capturing snapshots at configured intervals.
139+
"""Start capturing snapshots at configured intervals.
140140
141141
Raises:
142142
RuntimeError: If the context manager is already active.
@@ -158,7 +158,7 @@ async def __aexit__(
158158
exc_value: BaseException | None,
159159
exc_traceback: TracebackType | None,
160160
) -> None:
161-
"""Stops all resource capturing.
161+
"""Stop all resource capturing.
162162
163163
This method stops capturing snapshots of system resources (CPU, memory, event loop, and client information).
164164
It should be called to terminate resource capturing when it is no longer needed.
@@ -241,7 +241,7 @@ def _get_sample(snapshots: list[Snapshot], duration: timedelta | None = None) ->
241241
return [snapshot for snapshot in snapshots if latest_time - snapshot.created_at <= duration]
242242

243243
def _snapshot_cpu(self, event_data: EventSystemInfoData) -> None:
244-
"""Captures a snapshot of the current CPU usage.
244+
"""Capture a snapshot of the current CPU usage.
245245
246246
This method does not perform CPU usage measurement. Instead, it just reads the data received through
247247
the `event_data` parameter, which is expected to be supplied by the event manager.
@@ -260,7 +260,7 @@ def _snapshot_cpu(self, event_data: EventSystemInfoData) -> None:
260260
self._cpu_snapshots.add(snapshot)
261261

262262
def _snapshot_memory(self, event_data: EventSystemInfoData) -> None:
263-
"""Captures a snapshot of the current memory usage.
263+
"""Capture a snapshot of the current memory usage.
264264
265265
This method does not perform memory usage measurement. Instead, it just reads the data received through
266266
the `event_data` parameter, which is expected to be supplied by the event manager.
@@ -281,7 +281,7 @@ def _snapshot_memory(self, event_data: EventSystemInfoData) -> None:
281281
self._evaluate_memory_load(event_data.memory_info.current_size, event_data.memory_info.created_at)
282282

283283
def _snapshot_event_loop(self) -> None:
284-
"""Captures a snapshot of the current event loop usage.
284+
"""Capture a snapshot of the current event loop usage.
285285
286286
This method evaluates the event loop's latency by comparing the expected time between snapshots to the actual
287287
time elapsed since the last snapshot. The delay in the snapshot reflects the time deviation due to event loop
@@ -300,7 +300,7 @@ def _snapshot_event_loop(self) -> None:
300300
self._event_loop_snapshots.add(snapshot)
301301

302302
def _snapshot_client(self) -> None:
303-
"""Captures a snapshot of the current API state by checking for rate limit errors (HTTP 429).
303+
"""Capture a snapshot of the current API state by checking for rate limit errors (HTTP 429).
304304
305305
Only errors produced by a 2nd retry of the API call are considered for snapshotting since earlier errors may
306306
just be caused by a random spike in the number of requests and do not necessarily signify API overloading.
@@ -317,7 +317,7 @@ def _snapshot_client(self) -> None:
317317
self._client_snapshots.add(snapshot)
318318

319319
def _prune_snapshots(self, snapshots: list[Snapshot], now: datetime) -> None:
320-
"""Removes snapshots that are older than the `self._snapshot_history`.
320+
"""Remove snapshots that are older than the `self._snapshot_history`.
321321
322322
This method modifies the list of snapshots in place, removing all snapshots that are older than the defined
323323
snapshot history relative to the `now` parameter.
@@ -342,7 +342,7 @@ def _prune_snapshots(self, snapshots: list[Snapshot], now: datetime) -> None:
342342
snapshots.clear()
343343

344344
def _evaluate_memory_load(self, current_memory_usage_size: ByteSize, snapshot_timestamp: datetime) -> None:
345-
"""Evaluates and logs critical memory load conditions based on the system information.
345+
"""Evaluate and logs critical memory load conditions based on the system information.
346346
347347
Args:
348348
current_memory_usage_size: The current memory usage.

src/crawlee/_autoscaling/system_status.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ def __init__(
4646
event_loop_overload_threshold: float = 0.6,
4747
client_overload_threshold: float = 0.3,
4848
) -> None:
49-
"""A default constructor.
49+
"""Initialize a new instance.
5050
5151
Args:
5252
snapshotter: The `Snapshotter` instance to be queried for `SystemStatus`.
@@ -69,7 +69,7 @@ def __init__(
6969
self._client_overload_threshold = client_overload_threshold
7070

7171
def get_current_system_info(self) -> SystemInfo:
72-
"""Retrieves and evaluates the current status of system resources.
72+
"""Retrieve and evaluates the current status of system resources.
7373
7474
Considers snapshots within the `_max_snapshot_age` timeframe and determines if the system is currently
7575
overloaded based on predefined thresholds for each resource type.
@@ -80,7 +80,7 @@ def get_current_system_info(self) -> SystemInfo:
8080
return self._get_system_info(sample_duration=self._max_snapshot_age)
8181

8282
def get_historical_system_info(self) -> SystemInfo:
83-
"""Retrieves and evaluates the historical status of system resources.
83+
"""Retrieve and evaluates the historical status of system resources.
8484
8585
Considers the entire history of snapshots from the Snapshotter to assess long-term system performance and
8686
determines if the system has been historically overloaded.

src/crawlee/_log_config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ def __init__(
9191
*args: Any,
9292
**kwargs: Any,
9393
) -> None:
94-
"""A default constructor.
94+
"""Initialize a new instance.
9595
9696
Args:
9797
include_logger_name: Include logger name at the beginning of the log line.

src/crawlee/_types.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@
5151

5252

5353
def _normalize_headers(headers: Mapping[str, str]) -> dict[str, str]:
54-
"""Converts all header keys to lowercase, strips whitespace, and returns them sorted by key."""
54+
"""Convert all header keys to lowercase, strips whitespace, and returns them sorted by key."""
5555
normalized_headers = {k.lower().strip(): v.strip() for k, v in headers.items()}
5656
sorted_headers = sorted(normalized_headers.items())
5757
return dict(sorted_headers)
@@ -106,7 +106,7 @@ def __init__(
106106
max_tasks_per_minute: float = float('inf'),
107107
desired_concurrency: int | None = None,
108108
) -> None:
109-
"""A default constructor.
109+
"""Initialize a new instance.
110110
111111
Args:
112112
min_concurrency: The minimum number of tasks running in parallel. If you set this value too high
@@ -340,7 +340,7 @@ def __call__(
340340
transform_request_function: Callable[[RequestOptions], RequestOptions | RequestTransformAction] | None = None,
341341
**kwargs: Unpack[EnqueueLinksKwargs],
342342
) -> Coroutine[None, None, None]:
343-
"""A call dunder method.
343+
"""Call enqueue links function.
344344
345345
Args:
346346
selector: A selector used to find the elements containing the links. The behaviour differs based
@@ -488,7 +488,7 @@ def __call__(
488488
method: HttpMethod = 'GET',
489489
headers: HttpHeaders | dict[str, str] | None = None,
490490
) -> Coroutine[None, None, HttpResponse]:
491-
"""A call dunder method.
491+
"""Call send request function.
492492
493493
Args:
494494
url: The URL to send the request to.

src/crawlee/_utils/console.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010

1111
def make_table(rows: Sequence[Sequence[str]], width: int = 100) -> str:
12-
"""Creates a text table using Unicode characters.
12+
"""Create a text table using Unicode characters.
1313
1414
Args:
1515
rows: A list of tuples/lists to be displayed in the table.

src/crawlee/_utils/context.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,19 @@
88

99

1010
def ensure_context(method: T) -> T:
11-
"""Decorator to ensure the (async) context manager is initialized before calling the method.
11+
"""Ensure the (async) context manager is initialized before executing the method.
12+
13+
This decorator checks if the calling instance has an `active` attribute and verifies that it is set to `True`.
14+
If the instance is inactive, it raises a `RuntimeError`. Works for both synchronous and asynchronous methods.
1215
1316
Args:
1417
method: The method to wrap.
1518
1619
Returns:
1720
The wrapped method with context checking applied.
21+
22+
Raises:
23+
RuntimeError: If the instance lacks an `active` attribute or is not active.
1824
"""
1925

2026
@wraps(method)

src/crawlee/_utils/crypto.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66

77
def compute_short_hash(data: bytes, *, length: int = 8) -> str:
8-
"""Computes a hexadecimal SHA-256 hash of the provided data and returns a substring (prefix) of it.
8+
"""Compute a hexadecimal SHA-256 hash of the provided data and returns a substring (prefix) of it.
99
1010
Args:
1111
data: The binary data to be hashed.
@@ -19,6 +19,6 @@ def compute_short_hash(data: bytes, *, length: int = 8) -> str:
1919

2020

2121
def crypto_random_object_id(length: int = 17) -> str:
22-
"""Generates a random object ID."""
22+
"""Generate a random object ID."""
2323
chars = 'abcdefghijklmnopqrstuvwxyzABCEDFGHIJKLMNOPQRSTUVWXYZ0123456789'
2424
return ''.join(secrets.choice(chars) for _ in range(length))

src/crawlee/_utils/docs.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,16 @@
66

77

88
def docs_group(group_name: GroupName) -> Callable: # noqa: ARG001
9-
"""Decorator to mark symbols for rendering and grouping in documentation.
9+
"""Mark a symbol for rendering and grouping in documentation.
1010
11-
This decorator is used purely for documentation purposes and does not alter the behavior
11+
This decorator is used solely for documentation purposes and does not modify the behavior
1212
of the decorated callable.
13+
14+
Args:
15+
group_name: The documentation group to which the symbol belongs.
16+
17+
Returns:
18+
The original callable without modification.
1319
"""
1420

1521
def wrapper(func: Callable) -> Callable:

0 commit comments

Comments
 (0)