Skip to content
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
82 commits
Select commit Hold shift + click to select a range
a3c5fa0
base implementation sql client
Mantisus Jul 29, 2025
3142bdd
resolve
Mantisus Jul 29, 2025
b056505
add dataset tests
Mantisus Jul 29, 2025
ae3bc3d
add kvs tests
Mantisus Jul 30, 2025
49f2643
add rq tests
Mantisus Jul 30, 2025
35a27fc
fix docs in tests
Mantisus Jul 30, 2025
52e1ad2
wrap `SQLStorageClient` in _try_import
Mantisus Jul 30, 2025
df41c45
update db models
Mantisus Jul 30, 2025
342c65a
dataset optimization
Mantisus Jul 30, 2025
61a2666
kvs optimization
Mantisus Jul 31, 2025
7055f7d
optimization
Mantisus Aug 1, 2025
1884f7d
reduce the refresh rate of `accessed_at`
Mantisus Aug 1, 2025
a10e3cf
up docs
Mantisus Aug 1, 2025
f7ebbe5
Update src/crawlee/storage_clients/_sql/_request_queue_client.py
Mantisus Aug 1, 2025
83ca6d3
fix tests
Mantisus Aug 1, 2025
1e3474c
Merge master
Mantisus Aug 3, 2025
8086ab2
same updates
Mantisus Aug 19, 2025
9ee93ab
resolve
Mantisus Aug 19, 2025
2934836
Merge branch 'master' into sql-client
Mantisus Aug 20, 2025
6401b65
up pyproject
Mantisus Aug 20, 2025
1c11d97
Merge branch 'master' into sql-client
Mantisus Aug 21, 2025
df927d1
refactor
Mantisus Aug 21, 2025
9f5e640
fix len strict for metadata_id in kvs_record
Mantisus Aug 21, 2025
77c1894
fix cache
Mantisus Aug 22, 2025
b3c1aad
update queue for support multi-clients
Mantisus Aug 23, 2025
fb8ce7d
fix metadata calculate
Mantisus Aug 23, 2025
63249bb
Add experimental warning
Mantisus Aug 23, 2025
0d62dcf
remove mysql
Mantisus Aug 24, 2025
dffeb76
raise Error for unsupported dialects
Mantisus Aug 24, 2025
61ba512
optimize update timestamps in metadata
Mantisus Aug 24, 2025
46e12b4
add docs
Mantisus Aug 24, 2025
41fcb35
Merge branch 'master' into sql-client
Mantisus Aug 25, 2025
b92e385
Update pyproject.toml
Mantisus Aug 25, 2025
045fe9c
up docs
Mantisus Aug 25, 2025
1a7618e
up database types
Mantisus Aug 26, 2025
cf1f722
Up names
Mantisus Aug 26, 2025
9328d9d
Update src/crawlee/storage_clients/_sql/_key_value_store_client.py
Mantisus Aug 26, 2025
9296d90
save session maker
Mantisus Aug 26, 2025
bdc1258
some updates
Mantisus Aug 26, 2025
9d47cff
Apply suggestion from @vdusek
Mantisus Aug 27, 2025
f69771e
Apply suggestion from @vdusek
Mantisus Aug 27, 2025
3d53ac2
Update docs/guides/storage_clients.mdx
Mantisus Aug 27, 2025
c7e3f8c
Update docs/guides/storage_clients.mdx
Mantisus Aug 27, 2025
5d05c06
Update src/crawlee/storage_clients/_sql/_client_mixin.py
Mantisus Aug 27, 2025
7a999a4
Update src/crawlee/storage_clients/_sql/_client_mixin.py
Mantisus Aug 27, 2025
bfec174
Update src/crawlee/storage_clients/_sql/_db_models.py
Mantisus Aug 27, 2025
a9b466f
Update src/crawlee/storage_clients/_sql/_db_models.py
Mantisus Aug 27, 2025
4443e98
Update src/crawlee/storage_clients/_sql/_storage_client.py
Mantisus Aug 27, 2025
245a4f9
Update docs/guides/storage_clients.mdx
Mantisus Aug 27, 2025
fb2937b
Update src/crawlee/storage_clients/_sql/_storage_client.py
Mantisus Aug 27, 2025
c3cc554
Update tests/unit/storages/test_request_queue.py
Mantisus Aug 27, 2025
05f59ca
polish sql-client
Mantisus Aug 27, 2025
473610d
Update docs/guides/storage_clients.mdx
Mantisus Aug 30, 2025
f17f6ca
Update docs/guides/storage_clients.mdx
Mantisus Aug 30, 2025
2ed4f06
Update docs/guides/storage_clients.mdx
Mantisus Aug 30, 2025
88a60f3
Update docs/guides/storage_clients.mdx
Mantisus Aug 30, 2025
a9b9671
chore(deps): update typescript-eslint monorepo to v8.41.0 (#1375)
renovate[bot] Aug 26, 2025
f8b2879
docs: Update `RequestLoader.fetch_next_request` docblock (#1374)
janbuchar Aug 26, 2025
4ba3a2e
chore(release): Update changelog and package version [skip ci]
Aug 26, 2025
1d0e531
chore(deps): update dependency types-cachetools to ~=6.2.0.20250827 (…
renovate[bot] Aug 27, 2025
5ae2c38
chore(deps): update yarn to v4.9.4 (#1377)
renovate[bot] Aug 27, 2025
ceaa9b5
docs: Update Request loaders guide (#1376)
vdusek Aug 27, 2025
3f0bf8a
chore: Fix accidentally missing name of the test (#1380)
Pijukatel Aug 28, 2025
3241785
feat: Persist the `SitemapRequestLoader` state (#1347)
Mantisus Aug 29, 2025
caff701
chore(release): Update changelog and package version [skip ci]
Aug 29, 2025
29cf5af
suppose warning
Mantisus Aug 30, 2025
bf47625
up code block
Mantisus Aug 30, 2025
b0e9f66
Merge branch 'master' into sql-client
Mantisus Sep 1, 2025
4d5ade3
up docs
Mantisus Sep 1, 2025
74f8825
drop cast
Mantisus Sep 1, 2025
d3a2ebc
fix docs
Mantisus Sep 1, 2025
7081fe4
clean docstrings
Mantisus Sep 1, 2025
b1a877e
extra optimization
Mantisus Sep 3, 2025
582adb0
Merge branch 'master' into sql-client
Mantisus Sep 3, 2025
d14c43a
handle create tables rom several parallel processes
Mantisus Sep 3, 2025
f48887a
add collumn client_key
Mantisus Sep 8, 2025
cd44018
few updates
Mantisus Sep 11, 2025
e740f21
Merge branch 'master' into sql-client
Mantisus Sep 12, 2025
6e66337
add support for NDU storages
Mantisus Sep 12, 2025
289ab8b
fix
Mantisus Sep 13, 2025
4734699
resolve
Mantisus Sep 16, 2025
bb84523
resolve
Mantisus Sep 18, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ dependencies = [
]

[project.optional-dependencies]
all = ["crawlee[adaptive-crawler,beautifulsoup,cli,curl-impersonate,httpx,parsel,playwright,otel]"]
all = ["crawlee[adaptive-crawler,beautifulsoup,cli,curl-impersonate,httpx,parsel,playwright,otel,sql]"]
adaptive-crawler = [
"jaro-winkler>=2.0.3",
"playwright>=1.27.0",
Expand All @@ -73,6 +73,10 @@ otel = [
"opentelemetry-semantic-conventions>=0.54",
"wrapt>=1.17.0",
]
sql = [
"sqlalchemy[asyncio]>=2.0.42,<3.0.0",
"aiosqlite>=0.21.0",
]

[project.scripts]
crawlee = "crawlee._cli:cli"
Expand Down
12 changes: 12 additions & 0 deletions src/crawlee/storage_clients/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,21 @@
from crawlee._utils.try_import import install_import_hook as _install_import_hook
from crawlee._utils.try_import import try_import as _try_import

# These imports have only mandatory dependencies, so they are imported directly.
from ._base import StorageClient
from ._file_system import FileSystemStorageClient
from ._memory import MemoryStorageClient

_install_import_hook(__name__)

# The following imports are wrapped in try_import to handle optional dependencies,
# ensuring the module can still function even if these dependencies are missing.
with _try_import(__name__, 'SQLStorageClient'):
from ._sql import SQLStorageClient

__all__ = [
'FileSystemStorageClient',
'MemoryStorageClient',
'SQLStorageClient',
'StorageClient',
]
6 changes: 1 addition & 5 deletions src/crawlee/storage_clients/_file_system/_storage_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,7 @@ class FileSystemStorageClient(StorageClient):

@override
async def create_dataset_client(
self,
*,
id: str | None = None,
name: str | None = None,
configuration: Configuration | None = None,
self, *, id: str | None = None, name: str | None = None, configuration: Configuration | None = None
) -> FileSystemDatasetClient:
configuration = configuration or Configuration.get_global_configuration()
client = await FileSystemDatasetClient.open(id=id, name=name, configuration=configuration)
Expand Down
6 changes: 6 additions & 0 deletions src/crawlee/storage_clients/_sql/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from ._dataset_client import SQLDatasetClient
from ._key_value_store_client import SQLKeyValueStoreClient
from ._request_queue_client import SQLRequestQueueClient
from ._storage_client import SQLStorageClient

__all__ = ['SQLDatasetClient', 'SQLKeyValueStoreClient', 'SQLRequestQueueClient', 'SQLStorageClient']
Loading
Loading