Skip to content

Commit ff516ed

Browse files
committed
inherit from Storage class and RQ init
1 parent bdadd43 commit ff516ed

File tree

6 files changed

+174
-736
lines changed

6 files changed

+174
-736
lines changed

src/crawlee/storage_clients/_base/_request_queue_client.py

Lines changed: 59 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
11
from __future__ import annotations
22

33
from abc import ABC, abstractmethod
4+
from datetime import datetime
45
from typing import TYPE_CHECKING
56

67
from crawlee._utils.docs import docs_group
78

89
if TYPE_CHECKING:
910
from collections.abc import Sequence
11+
from datetime import datetime
1012

1113
from crawlee.storage_clients.models import (
1214
BatchRequestsOperationResponse,
@@ -15,7 +17,6 @@
1517
Request,
1618
RequestQueueHead,
1719
RequestQueueHeadWithLocks,
18-
RequestQueueMetadata,
1920
)
2021

2122

@@ -27,17 +28,67 @@ class RequestQueueClient(ABC):
2728
client, like a memory storage client.
2829
"""
2930

31+
@property
3032
@abstractmethod
31-
async def get(self) -> RequestQueueMetadata | None:
32-
"""Get metadata about the request queue being managed by this client.
33+
def id(self) -> str:
34+
"""The ID of the dataset."""
3335

34-
Returns:
35-
An object containing the request queue's details, or None if the request queue does not exist.
36-
"""
36+
@property
37+
@abstractmethod
38+
def name(self) -> str | None:
39+
"""The name of the dataset."""
40+
41+
@property
42+
@abstractmethod
43+
def created_at(self) -> datetime:
44+
"""The time at which the dataset was created."""
45+
46+
@property
47+
@abstractmethod
48+
def accessed_at(self) -> datetime:
49+
"""The time at which the dataset was last accessed."""
50+
51+
@property
52+
@abstractmethod
53+
def modified_at(self) -> datetime:
54+
"""The time at which the dataset was last modified."""
3755

56+
@property
3857
@abstractmethod
39-
async def delete(self) -> None:
40-
"""Permanently delete the request queue managed by this client."""
58+
def had_multiple_clients(self) -> bool:
59+
"""TODO."""
60+
61+
@property
62+
@abstractmethod
63+
def handled_request_count(self) -> int:
64+
"""TODO."""
65+
66+
@property
67+
@abstractmethod
68+
def pending_request_count(self) -> int:
69+
"""TODO."""
70+
71+
@property
72+
@abstractmethod
73+
def stats(self) -> dict:
74+
"""TODO."""
75+
76+
@property
77+
@abstractmethod
78+
def total_request_count(self) -> int:
79+
"""TODO."""
80+
81+
@property
82+
@abstractmethod
83+
def resource_directory(self) -> str:
84+
"""TODO."""
85+
86+
@abstractmethod
87+
async def drop(self) -> None:
88+
"""Drop the whole request queue and remove all its values.
89+
90+
The backend method for the `RequestQueue.drop` call.
91+
"""
4192

4293
@abstractmethod
4394
async def list_head(self, *, limit: int | None = None) -> RequestQueueHead:

src/crawlee/storages/_base.py

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
if TYPE_CHECKING:
77
from crawlee.configuration import Configuration
88
from crawlee.storage_clients._base import StorageClient
9-
from crawlee.storage_clients.models import StorageMetadata
9+
from crawlee.storage_clients.models import DatasetMetadata, KeyValueStoreMetadata, RequestQueueMetadata
1010

1111

1212
class Storage(ABC):
@@ -24,13 +24,8 @@ def name(self) -> str | None:
2424

2525
@property
2626
@abstractmethod
27-
def storage_object(self) -> StorageMetadata:
28-
"""Get the full storage object."""
29-
30-
@storage_object.setter
31-
@abstractmethod
32-
def storage_object(self, storage_object: StorageMetadata) -> None:
33-
"""Set the full storage object."""
27+
def metadata(self) -> DatasetMetadata | KeyValueStoreMetadata | RequestQueueMetadata:
28+
"""Get the storage metadata."""
3429

3530
@classmethod
3631
@abstractmethod

src/crawlee/storages/_creation_management.py

Lines changed: 0 additions & 210 deletions
This file was deleted.

src/crawlee/storages/_dataset.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,14 @@
55
from pathlib import Path
66
from typing import TYPE_CHECKING, Literal
77

8+
from typing_extensions import override
9+
810
from crawlee import service_locator
911
from crawlee._utils.docs import docs_group
1012
from crawlee._utils.file import export_csv_to_stream, export_json_to_stream
1113
from crawlee.storage_clients.models import DatasetMetadata
1214

15+
from ._base import Storage
1316
from ._key_value_store import KeyValueStore
1417

1518
if TYPE_CHECKING:
@@ -28,7 +31,6 @@
2831
logger = logging.getLogger(__name__)
2932

3033
# TODO:
31-
# - inherit from storage class
3234
# - caching / memoization of both datasets & dataset clients
3335

3436
# Properties:
@@ -56,7 +58,7 @@
5658

5759

5860
@docs_group('Classes')
59-
class Dataset:
61+
class Dataset(Storage):
6062
"""Dataset is an append-only structured storage, ideal for tabular data similar to database tables.
6163
6264
The `Dataset` class is designed to store structured data, where each entry (row) maintains consistent attributes
@@ -98,14 +100,17 @@ def __init__(self, client: DatasetClient) -> None:
98100
"""
99101
self._client = client
100102

103+
@override
101104
@property
102105
def id(self) -> str:
103106
return self._client.id
104107

108+
@override
105109
@property
106110
def name(self) -> str | None:
107111
return self._client.name
108112

113+
@override
109114
@property
110115
def metadata(self) -> DatasetMetadata:
111116
return DatasetMetadata(
@@ -117,6 +122,7 @@ def metadata(self) -> DatasetMetadata:
117122
item_count=self._client.item_count,
118123
)
119124

125+
@override
120126
@classmethod
121127
async def open(
122128
cls,
@@ -145,6 +151,7 @@ async def open(
145151

146152
return cls(client)
147153

154+
@override
148155
async def drop(self) -> None:
149156
await self._client.drop()
150157

0 commit comments

Comments
 (0)