Skip to content

Commit 7fb36b5

Browse files
authored
chore: replace os.path with pathlib.Path in tests (#1101)
### Description - Replace `os.path` with `pathlib.Path` in tests. To improve readability, and a unified style of working with file paths
1 parent 37f7610 commit 7fb36b5

File tree

7 files changed

+110
-106
lines changed

7 files changed

+110
-106
lines changed

src/crawlee/_utils/file.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,10 @@
99
import re
1010
import shutil
1111
from enum import Enum
12-
from typing import Any
12+
from typing import TYPE_CHECKING, Any
13+
14+
if TYPE_CHECKING:
15+
from pathlib import Path
1316

1417

1518
class ContentType(Enum):
@@ -27,7 +30,7 @@ def is_content_type(content_type_enum: ContentType, content_type: str) -> bool:
2730
return content_type_enum.matches(content_type)
2831

2932

30-
async def force_remove(filename: str) -> None:
33+
async def force_remove(filename: str | Path) -> None:
3134
"""Removes a file, suppressing the FileNotFoundError if it does not exist.
3235
3336
JS-like rm(filename, { force: true }).
@@ -39,7 +42,7 @@ async def force_remove(filename: str) -> None:
3942
await asyncio.to_thread(os.remove, filename)
4043

4144

42-
async def force_rename(src_dir: str, dst_dir: str) -> None:
45+
async def force_rename(src_dir: str | Path, dst_dir: str | Path) -> None:
4346
"""Renames a directory, ensuring that the destination directory is removed if it exists.
4447
4548
Args:

tests/unit/_utils/test_file.py

Lines changed: 18 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
11
from __future__ import annotations
22

33
import io
4-
import os
54
from datetime import datetime, timezone
6-
from typing import TYPE_CHECKING
5+
from pathlib import Path
76

87
import pytest
98

@@ -17,9 +16,6 @@
1716
json_dumps,
1817
)
1918

20-
if TYPE_CHECKING:
21-
from pathlib import Path
22-
2319

2420
async def test_json_dumps() -> None:
2521
assert await json_dumps({'key': 'value'}) == '{\n "key": "value"\n}'
@@ -112,42 +108,42 @@ def test_determine_file_extension() -> None:
112108

113109

114110
async def test_force_remove(tmp_path: Path) -> None:
115-
test_file_path = os.path.join(tmp_path, 'test.txt')
111+
test_file_path = Path(tmp_path, 'test.txt')
116112
# Does not crash/raise when the file does not exist
117-
assert os.path.exists(test_file_path) is False
113+
assert test_file_path.exists() is False
118114
await force_remove(test_file_path)
119-
assert os.path.exists(test_file_path) is False
115+
assert test_file_path.exists() is False
120116

121117
# Removes the file if it exists
122118
with open(test_file_path, 'a', encoding='utf-8'): # noqa: ASYNC230
123119
pass
124-
assert os.path.exists(test_file_path) is True
120+
assert test_file_path.exists() is True
125121
await force_remove(test_file_path)
126-
assert os.path.exists(test_file_path) is False
122+
assert test_file_path.exists() is False
127123

128124

129125
async def test_force_rename(tmp_path: Path) -> None:
130-
src_dir = os.path.join(tmp_path, 'src')
131-
dst_dir = os.path.join(tmp_path, 'dst')
132-
src_file = os.path.join(src_dir, 'src_dir.txt')
133-
dst_file = os.path.join(dst_dir, 'dst_dir.txt')
126+
src_dir = Path(tmp_path, 'src')
127+
dst_dir = Path(tmp_path, 'dst')
128+
src_file = Path(src_dir, 'src_dir.txt')
129+
dst_file = Path(dst_dir, 'dst_dir.txt')
134130
# Won't crash if source directory does not exist
135-
assert os.path.exists(src_dir) is False
131+
assert src_dir.exists() is False
136132
await force_rename(src_dir, dst_dir)
137133

138134
# Will remove dst_dir if it exists (also covers normal case)
139135
# Create the src_dir with a file in it
140-
os.mkdir(src_dir)
136+
src_dir.mkdir()
141137
with open(src_file, 'a', encoding='utf-8'): # noqa: ASYNC230
142138
pass
143139
# Create the dst_dir with a file in it
144-
os.mkdir(dst_dir)
140+
dst_dir.mkdir()
145141
with open(dst_file, 'a', encoding='utf-8'): # noqa: ASYNC230
146142
pass
147-
assert os.path.exists(src_file) is True
148-
assert os.path.exists(dst_file) is True
143+
assert src_file.exists() is True
144+
assert dst_file.exists() is True
149145
await force_rename(src_dir, dst_dir)
150-
assert os.path.exists(src_dir) is False
151-
assert os.path.exists(dst_file) is False
146+
assert src_dir.exists() is False
147+
assert dst_file.exists() is False
152148
# src_dir.txt should exist in dst_dir
153-
assert os.path.exists(os.path.join(dst_dir, 'src_dir.txt')) is True
149+
assert (dst_dir / 'src_dir.txt').exists() is True

tests/unit/storage_clients/_memory/test_dataset_client.py

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from __future__ import annotations
22

33
import asyncio
4-
import os
4+
from pathlib import Path
55
from typing import TYPE_CHECKING
66

77
import pytest
@@ -53,17 +53,15 @@ async def test_update(dataset_client: DatasetClient) -> None:
5353

5454
old_dataset_info = await dataset_client.get()
5555
assert old_dataset_info is not None
56-
old_dataset_directory = os.path.join(
57-
dataset_client._memory_storage_client.datasets_directory, old_dataset_info.name or ''
58-
)
59-
new_dataset_directory = os.path.join(dataset_client._memory_storage_client.datasets_directory, new_dataset_name)
60-
assert os.path.exists(os.path.join(old_dataset_directory, '000000001.json')) is True
61-
assert os.path.exists(os.path.join(new_dataset_directory, '000000001.json')) is False
56+
old_dataset_directory = Path(dataset_client._memory_storage_client.datasets_directory, old_dataset_info.name or '')
57+
new_dataset_directory = Path(dataset_client._memory_storage_client.datasets_directory, new_dataset_name)
58+
assert (old_dataset_directory / '000000001.json').exists() is True
59+
assert (new_dataset_directory / '000000001.json').exists() is False
6260

6361
await asyncio.sleep(0.1)
6462
updated_dataset_info = await dataset_client.update(name=new_dataset_name)
65-
assert os.path.exists(os.path.join(old_dataset_directory, '000000001.json')) is False
66-
assert os.path.exists(os.path.join(new_dataset_directory, '000000001.json')) is True
63+
assert (old_dataset_directory / '000000001.json').exists() is False
64+
assert (new_dataset_directory / '000000001.json').exists() is True
6765
# Only modified_at and accessed_at should be different
6866
assert old_dataset_info.created_at == updated_dataset_info.created_at
6967
assert old_dataset_info.modified_at != updated_dataset_info.modified_at
@@ -78,10 +76,10 @@ async def test_delete(dataset_client: DatasetClient) -> None:
7876
await dataset_client.push_items({'abc': 123})
7977
dataset_info = await dataset_client.get()
8078
assert dataset_info is not None
81-
dataset_directory = os.path.join(dataset_client._memory_storage_client.datasets_directory, dataset_info.name or '')
82-
assert os.path.exists(os.path.join(dataset_directory, '000000001.json')) is True
79+
dataset_directory = Path(dataset_client._memory_storage_client.datasets_directory, dataset_info.name or '')
80+
assert (dataset_directory / '000000001.json').exists() is True
8381
await dataset_client.delete()
84-
assert os.path.exists(os.path.join(dataset_directory, '000000001.json')) is False
82+
assert (dataset_directory / '000000001.json').exists() is False
8583
# Does not crash when called again
8684
await dataset_client.delete()
8785

tests/unit/storage_clients/_memory/test_key_value_store_client.py

Lines changed: 21 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33
import asyncio
44
import base64
55
import json
6-
import os
76
from datetime import datetime, timezone
7+
from pathlib import Path
88
from typing import TYPE_CHECKING
99

1010
import pytest
@@ -16,8 +16,6 @@
1616
from crawlee.storage_clients.models import KeyValueStoreMetadata, KeyValueStoreRecordMetadata
1717

1818
if TYPE_CHECKING:
19-
from pathlib import Path
20-
2119
from crawlee.storage_clients import MemoryStorageClient
2220
from crawlee.storage_clients._memory import KeyValueStoreClient
2321

@@ -79,19 +77,17 @@ async def test_update(key_value_store_client: KeyValueStoreClient) -> None:
7977
await key_value_store_client.set_record('test', {'abc': 123})
8078
old_kvs_info = await key_value_store_client.get()
8179
assert old_kvs_info is not None
82-
old_kvs_directory = os.path.join(
80+
old_kvs_directory = Path(
8381
key_value_store_client._memory_storage_client.key_value_stores_directory, old_kvs_info.name or ''
8482
)
85-
new_kvs_directory = os.path.join(
86-
key_value_store_client._memory_storage_client.key_value_stores_directory, new_kvs_name
87-
)
88-
assert os.path.exists(os.path.join(old_kvs_directory, 'test.json')) is True
89-
assert os.path.exists(os.path.join(new_kvs_directory, 'test.json')) is False
83+
new_kvs_directory = Path(key_value_store_client._memory_storage_client.key_value_stores_directory, new_kvs_name)
84+
assert (old_kvs_directory / 'test.json').exists() is True
85+
assert (new_kvs_directory / 'test.json').exists() is False
9086

9187
await asyncio.sleep(0.1)
9288
updated_kvs_info = await key_value_store_client.update(name=new_kvs_name)
93-
assert os.path.exists(os.path.join(old_kvs_directory, 'test.json')) is False
94-
assert os.path.exists(os.path.join(new_kvs_directory, 'test.json')) is True
89+
assert (old_kvs_directory / 'test.json').exists() is False
90+
assert (new_kvs_directory / 'test.json').exists() is True
9591
# Only modified_at and accessed_at should be different
9692
assert old_kvs_info.created_at == updated_kvs_info.created_at
9793
assert old_kvs_info.modified_at != updated_kvs_info.modified_at
@@ -106,12 +102,10 @@ async def test_delete(key_value_store_client: KeyValueStoreClient) -> None:
106102
await key_value_store_client.set_record('test', {'abc': 123})
107103
kvs_info = await key_value_store_client.get()
108104
assert kvs_info is not None
109-
kvs_directory = os.path.join(
110-
key_value_store_client._memory_storage_client.key_value_stores_directory, kvs_info.name or ''
111-
)
112-
assert os.path.exists(os.path.join(kvs_directory, 'test.json')) is True
105+
kvs_directory = Path(key_value_store_client._memory_storage_client.key_value_stores_directory, kvs_info.name or '')
106+
assert (kvs_directory / 'test.json').exists() is True
113107
await key_value_store_client.delete()
114-
assert os.path.exists(os.path.join(kvs_directory, 'test.json')) is False
108+
assert (kvs_directory / 'test.json').exists() is False
115109
# Does not crash when called again
116110
await key_value_store_client.delete()
117111

@@ -188,7 +182,7 @@ async def test_get_and_set_record(tmp_path: Path, key_value_store_client: KeyVal
188182
assert bytes_record_info.value.decode('utf-8') == bytes_value.decode('utf-8')
189183

190184
# Test using file descriptor
191-
with open(os.path.join(tmp_path, 'test.json'), 'w+', encoding='utf-8') as f: # noqa: ASYNC230
185+
with open(tmp_path / 'test.json', 'w+', encoding='utf-8') as f: # noqa: ASYNC230
192186
f.write('Test')
193187
with pytest.raises(NotImplementedError, match='File-like values are not supported in local memory storage'):
194188
await key_value_store_client.set_record('file', f)
@@ -283,12 +277,12 @@ async def test_writes_correct_metadata(
283277
)
284278

285279
# Check that everything was written correctly, both the data and metadata
286-
storage_path = os.path.join(memory_storage_client.key_value_stores_directory, key_value_store_name)
287-
item_path = os.path.join(storage_path, expected_output['filename'])
288-
item_metadata_path = os.path.join(storage_path, f'{expected_output["filename"]}.__metadata__.json')
280+
storage_path = Path(memory_storage_client.key_value_stores_directory, key_value_store_name)
281+
item_path = Path(storage_path, expected_output['filename'])
282+
item_metadata_path = storage_path / f'{expected_output["filename"]}.__metadata__.json'
289283

290-
assert os.path.exists(item_path)
291-
assert os.path.exists(item_metadata_path)
284+
assert item_path.exists()
285+
assert item_metadata_path.exists()
292286

293287
# Test the actual value of the item
294288
with open(item_path, 'rb') as item_file: # noqa: ASYNC230
@@ -396,8 +390,8 @@ async def test_reads_correct_metadata(
396390
key_value_store_name = crypto_random_object_id()
397391

398392
# Ensure the directory for the store exists
399-
storage_path = os.path.join(memory_storage_client.key_value_stores_directory, key_value_store_name)
400-
os.makedirs(storage_path, exist_ok=True)
393+
storage_path = Path(memory_storage_client.key_value_stores_directory, key_value_store_name)
394+
storage_path.mkdir(exist_ok=True, parents=True)
401395

402396
store_metadata = KeyValueStoreMetadata(
403397
id=crypto_random_object_id(),
@@ -409,12 +403,12 @@ async def test_reads_correct_metadata(
409403
)
410404

411405
# Write the store metadata to disk
412-
storage_metadata_path = os.path.join(storage_path, METADATA_FILENAME)
406+
storage_metadata_path = storage_path / METADATA_FILENAME
413407
with open(storage_metadata_path, mode='wb') as f: # noqa: ASYNC230
414408
f.write(store_metadata.model_dump_json().encode('utf-8'))
415409

416410
# Write the test input item to the disk
417-
item_path = os.path.join(storage_path, input_data['filename'])
411+
item_path = storage_path / input_data['filename']
418412
with open(item_path, 'wb') as item_file: # noqa: ASYNC230
419413
if isinstance(input_data['value'], bytes):
420414
item_file.write(input_data['value'])
@@ -426,7 +420,7 @@ async def test_reads_correct_metadata(
426420

427421
# Optionally write the metadata to disk if there is some
428422
if input_data['metadata'] is not None:
429-
storage_metadata_path = os.path.join(storage_path, input_data['filename'] + '.__metadata__.json')
423+
storage_metadata_path = storage_path / f'{input_data["filename"]}.__metadata__.json'
430424
with open(storage_metadata_path, 'w', encoding='utf-8') as metadata_file: # noqa: ASYNC230
431425
s = await json_dumps(
432426
{

0 commit comments

Comments
 (0)