1
1
from __future__ import annotations
2
2
3
3
import json
4
- import os
5
- from typing import TYPE_CHECKING
4
+ from pathlib import Path
5
+ from unittest .mock import AsyncMock , patch
6
+
7
+ import pytest
6
8
7
9
from crawlee ._consts import METADATA_FILENAME
8
10
from crawlee .storage_clients ._memory ._creation_management import persist_metadata_if_enabled
9
11
10
- if TYPE_CHECKING :
11
- from pathlib import Path
12
-
13
12
14
13
async def test_persist_metadata_skips_when_disabled (tmp_path : Path ) -> None :
15
14
await persist_metadata_if_enabled (data = {'key' : 'value' }, entity_directory = str (tmp_path ), write_metadata = False )
@@ -18,17 +17,43 @@ async def test_persist_metadata_skips_when_disabled(tmp_path: Path) -> None:
18
17
19
18
async def test_persist_metadata_creates_files_and_directories_when_enabled (tmp_path : Path ) -> None :
20
19
data = {'key' : 'value' }
21
- entity_directory = os . path . join (tmp_path , 'new_dir' )
22
- await persist_metadata_if_enabled (data = data , entity_directory = entity_directory , write_metadata = True )
23
- assert os . path . exists (entity_directory ) # Check if directory was created
24
- assert os . path . isfile ( os . path . join ( entity_directory , METADATA_FILENAME )) # Check if file was created
20
+ entity_directory = Path (tmp_path , 'new_dir' )
21
+ await persist_metadata_if_enabled (data = data , entity_directory = str ( entity_directory ) , write_metadata = True )
22
+ assert entity_directory . exists () is True # Check if directory was created
23
+ assert ( entity_directory / METADATA_FILENAME ). is_file ( ) # Check if file was created
25
24
26
25
27
26
async def test_persist_metadata_correctly_writes_data (tmp_path : Path ) -> None :
28
27
data = {'key' : 'value' }
29
- entity_directory = os . path . join (tmp_path , 'data_dir' )
30
- await persist_metadata_if_enabled (data = data , entity_directory = entity_directory , write_metadata = True )
31
- metadata_path = os . path . join ( entity_directory , METADATA_FILENAME )
28
+ entity_directory = Path (tmp_path , 'data_dir' )
29
+ await persist_metadata_if_enabled (data = data , entity_directory = str ( entity_directory ) , write_metadata = True )
30
+ metadata_path = entity_directory / METADATA_FILENAME
32
31
with open (metadata_path ) as f : # noqa: ASYNC230
33
32
content = f .read ()
34
33
assert json .loads (content ) == data # Check if correct data was written
34
+
35
+
36
+ async def test_persist_metadata_rewrites_data_with_error (tmp_path : Path ) -> None :
37
+ init_data = {'key' : 'very_long_value' }
38
+ update_data = {'key' : 'short_value' }
39
+ error_data = {'key' : 'error' }
40
+
41
+ entity_directory = Path (tmp_path , 'data_dir' )
42
+ metadata_path = entity_directory / METADATA_FILENAME
43
+
44
+ # write metadata with init_data
45
+ await persist_metadata_if_enabled (data = init_data , entity_directory = str (entity_directory ), write_metadata = True )
46
+
47
+ # rewrite metadata with new_data
48
+ await persist_metadata_if_enabled (data = update_data , entity_directory = str (entity_directory ), write_metadata = True )
49
+ with open (metadata_path ) as f : # noqa: ASYNC230
50
+ content = f .read ()
51
+ assert json .loads (content ) == update_data # Check if correct data was rewritten
52
+
53
+ # raise interrupt between opening a file and writing
54
+ module_for_patch = 'crawlee.storage_clients._memory._creation_management.json_dumps'
55
+ with patch (module_for_patch , AsyncMock (side_effect = KeyboardInterrupt ())), pytest .raises (KeyboardInterrupt ):
56
+ await persist_metadata_if_enabled (data = error_data , entity_directory = str (entity_directory ), write_metadata = True )
57
+ with open (metadata_path ) as f : # noqa: ASYNC230
58
+ content = f .read ()
59
+ assert content == '' # The file is empty after an error
0 commit comments