File tree Expand file tree Collapse file tree 2 files changed +12
-1
lines changed
src/crawlee/memory_storage_client
tests/unit/_memory_storage_client Expand file tree Collapse file tree 2 files changed +12
-1
lines changed Original file line number Diff line number Diff line change @@ -181,6 +181,7 @@ def create_dataset_from_directory(
181
181
from crawlee .memory_storage_client ._dataset_client import DatasetClient
182
182
183
183
item_count = 0
184
+ has_seen_metadata_file = False
184
185
created_at = datetime .now (timezone .utc )
185
186
accessed_at = datetime .now (timezone .utc )
186
187
modified_at = datetime .now (timezone .utc )
@@ -189,6 +190,7 @@ def create_dataset_from_directory(
189
190
metadata_filepath = os .path .join (storage_directory , METADATA_FILENAME )
190
191
191
192
if os .path .exists (metadata_filepath ):
193
+ has_seen_metadata_file = True
192
194
with open (metadata_filepath , encoding = 'utf-8' ) as f :
193
195
json_content = json .load (f )
194
196
resource_info = DatasetMetadata (** json_content )
@@ -202,7 +204,6 @@ def create_dataset_from_directory(
202
204
203
205
# Load dataset entries
204
206
entries : dict [str , dict ] = {}
205
- has_seen_metadata_file = False
206
207
207
208
for entry in os .scandir (storage_directory ):
208
209
if entry .is_file ():
Original file line number Diff line number Diff line change @@ -138,3 +138,13 @@ async def test_iterate_items(dataset_client: DatasetClient) -> None:
138
138
assert len (actual_items ) == item_count
139
139
assert actual_items [0 ]['id' ] == 0
140
140
assert actual_items [99 ]['id' ] == 99
141
+
142
+
143
+ async def test_reuse_dataset (dataset_client : DatasetClient , memory_storage_client : MemoryStorageClient ) -> None :
144
+ item_count = 10
145
+ await dataset_client .push_items ([{'id' : i } for i in range (item_count )])
146
+
147
+ memory_storage_client .datasets_handled = [] # purge datasets loaded to test create_dataset_from_directory
148
+ datasets_client = memory_storage_client .datasets ()
149
+ dataset_info = await datasets_client .get_or_create (name = 'test' )
150
+ assert dataset_info .item_count == item_count
You can’t perform that action at this time.
0 commit comments