From c3c571b938f607f4f3b670c03bcfd61a52234155 Mon Sep 17 00:00:00 2001 From: Max Bohomolov Date: Mon, 6 Oct 2025 16:14:24 +0000 Subject: [PATCH 1/6] add storages name validation --- src/crawlee/storages/_dataset.py | 3 + src/crawlee/storages/_key_value_store.py | 3 + src/crawlee/storages/_request_queue.py | 3 + .../storages/_storage_instance_manager.py | 6 ++ src/crawlee/storages/_utils.py | 11 +++ .../test_adaptive_playwright_crawler.py | 2 +- tests/unit/otel/test_crawler_instrumentor.py | 2 +- tests/unit/sessions/test_session_pool.py | 2 +- .../_file_system/test_fs_dataset_client.py | 6 +- .../_file_system/test_fs_kvs_client.py | 6 +- .../_file_system/test_fs_rq_client.py | 6 +- .../_memory/test_memory_dataset_client.py | 6 +- .../_memory/test_memory_kvs_client.py | 6 +- .../_memory/test_memory_rq_client.py | 6 +- .../_sql/test_sql_dataset_client.py | 12 +-- .../_sql/test_sql_kvs_client.py | 12 +-- .../_sql/test_sql_rq_client.py | 12 +-- tests/unit/storages/test_dataset.py | 96 ++++++++++++------- tests/unit/storages/test_key_value_store.py | 90 +++++++++++------ tests/unit/storages/test_request_queue.py | 94 ++++++++++++------ 20 files changed, 253 insertions(+), 131 deletions(-) create mode 100644 src/crawlee/storages/_utils.py diff --git a/src/crawlee/storages/_dataset.py b/src/crawlee/storages/_dataset.py index 2001a8fa11..fdcc9b6441 100644 --- a/src/crawlee/storages/_dataset.py +++ b/src/crawlee/storages/_dataset.py @@ -12,6 +12,7 @@ from ._base import Storage from ._key_value_store import KeyValueStore +from ._utils import validate_storage_name if TYPE_CHECKING: from collections.abc import AsyncIterator @@ -75,6 +76,8 @@ def __init__(self, client: DatasetClient, id: str, name: str | None) -> None: id: The unique identifier of the storage. name: The name of the storage, if available. """ + validate_storage_name(name) + self._client = client self._id = id self._name = name diff --git a/src/crawlee/storages/_key_value_store.py b/src/crawlee/storages/_key_value_store.py index 3260e4f91e..bdba9dcbf3 100644 --- a/src/crawlee/storages/_key_value_store.py +++ b/src/crawlee/storages/_key_value_store.py @@ -15,6 +15,7 @@ from crawlee.storage_clients.models import KeyValueStoreMetadata from ._base import Storage +from ._utils import validate_storage_name if TYPE_CHECKING: from collections.abc import AsyncIterator @@ -84,6 +85,8 @@ def __init__(self, client: KeyValueStoreClient, id: str, name: str | None) -> No id: The unique identifier of the storage. name: The name of the storage, if available. """ + validate_storage_name(name) + self._client = client self._id = id self._name = name diff --git a/src/crawlee/storages/_request_queue.py b/src/crawlee/storages/_request_queue.py index 4e210d8b15..d079e0d1a2 100644 --- a/src/crawlee/storages/_request_queue.py +++ b/src/crawlee/storages/_request_queue.py @@ -13,6 +13,7 @@ from crawlee.request_loaders import RequestManager from ._base import Storage +from ._utils import validate_storage_name if TYPE_CHECKING: from collections.abc import Sequence @@ -80,6 +81,8 @@ def __init__(self, client: RequestQueueClient, id: str, name: str | None) -> Non id: The unique identifier of the storage. name: The name of the storage, if available. """ + validate_storage_name(name) + self._client = client self._id = id self._name = name diff --git a/src/crawlee/storages/_storage_instance_manager.py b/src/crawlee/storages/_storage_instance_manager.py index 2353d35570..4088c4102e 100644 --- a/src/crawlee/storages/_storage_instance_manager.py +++ b/src/crawlee/storages/_storage_instance_manager.py @@ -8,6 +8,8 @@ from crawlee._utils.raise_if_too_many_kwargs import raise_if_too_many_kwargs from crawlee.storage_clients._base import DatasetClient, KeyValueStoreClient, RequestQueueClient +from ._utils import validate_storage_name + if TYPE_CHECKING: from ._base import Storage @@ -146,6 +148,10 @@ async def open_storage_instance( f'Use a different name or drop the existing alias storage first.' ) + # Validate storage name + if name is not None: + validate_storage_name(name) + # Create new instance client: KeyValueStoreClient | DatasetClient | RequestQueueClient client = await client_opener_coro diff --git a/src/crawlee/storages/_utils.py b/src/crawlee/storages/_utils.py new file mode 100644 index 0000000000..17e1fcc55c --- /dev/null +++ b/src/crawlee/storages/_utils.py @@ -0,0 +1,11 @@ +import re + +NAME_REGEX = re.compile(r'^([a-zA-Z0-9]|[a-zA-Z0-9][a-zA-Z0-9-]*[a-zA-Z0-9])$') + + +def validate_storage_name(name: str | None) -> None: + if name and not NAME_REGEX.match(name): + raise ValueError( + f'Invalid storage name "{name}". Name can only contain letters "a" through "z", the digits "0" through' + '"9", and the hyphen ("-") but only in the middle of the string (e.g. "my-value-1")' + ) diff --git a/tests/unit/crawlers/_adaptive_playwright/test_adaptive_playwright_crawler.py b/tests/unit/crawlers/_adaptive_playwright/test_adaptive_playwright_crawler.py index 5fdb621718..5c21328860 100644 --- a/tests/unit/crawlers/_adaptive_playwright/test_adaptive_playwright_crawler.py +++ b/tests/unit/crawlers/_adaptive_playwright/test_adaptive_playwright_crawler.py @@ -473,7 +473,7 @@ async def request_handler(context: AdaptivePlaywrightCrawlingContext) -> None: async def test_adaptive_playwright_crawler_statistics_in_init() -> None: """Tests that adaptive crawler uses created AdaptivePlaywrightCrawlerStatistics from inputted Statistics.""" persistence_enabled = True - persist_state_kvs_name = 'some name' + persist_state_kvs_name = 'some-name' persist_state_key = 'come key' log_message = 'some message' periodic_message_logger = logging.getLogger('some logger') diff --git a/tests/unit/otel/test_crawler_instrumentor.py b/tests/unit/otel/test_crawler_instrumentor.py index c9d5198041..5b841ea035 100644 --- a/tests/unit/otel/test_crawler_instrumentor.py +++ b/tests/unit/otel/test_crawler_instrumentor.py @@ -38,7 +38,7 @@ async def test_crawler_instrumentor_capability(server_url: URL) -> None: # Generate first telemetry data from `Dataset` public methods. # `Dataset` is in `instrument_classes` argument, and thus it's public methods are instrumented. - dataset = await Dataset.open(name='test_dataset') + dataset = await Dataset.open(name='test-dataset') await dataset.drop() # Other traces will be from crawler run. diff --git a/tests/unit/sessions/test_session_pool.py b/tests/unit/sessions/test_session_pool.py index 2db7464d4f..abad0e5866 100644 --- a/tests/unit/sessions/test_session_pool.py +++ b/tests/unit/sessions/test_session_pool.py @@ -17,7 +17,7 @@ from collections.abc import AsyncGenerator MAX_POOL_SIZE = 3 -KVS_NAME = 'test_session_pool' +KVS_NAME = 'test-session-pool' PERSIST_STATE_KEY = 'crawlee_session_pool_state' diff --git a/tests/unit/storage_clients/_file_system/test_fs_dataset_client.py b/tests/unit/storage_clients/_file_system/test_fs_dataset_client.py index d3e5c6d9cf..fdf8a80cd6 100644 --- a/tests/unit/storage_clients/_file_system/test_fs_dataset_client.py +++ b/tests/unit/storage_clients/_file_system/test_fs_dataset_client.py @@ -27,14 +27,14 @@ def configuration(tmp_path: Path) -> Configuration: @pytest.fixture async def dataset_client(configuration: Configuration) -> AsyncGenerator[FileSystemDatasetClient, None]: """A fixture for a file system dataset client.""" - client = await FileSystemStorageClient().create_dataset_client(name='test_dataset', configuration=configuration) + client = await FileSystemStorageClient().create_dataset_client(name='test-dataset', configuration=configuration) yield client await client.drop() async def test_file_and_directory_creation(configuration: Configuration) -> None: """Test that file system dataset creates proper files and directories.""" - client = await FileSystemStorageClient().create_dataset_client(name='new_dataset', configuration=configuration) + client = await FileSystemStorageClient().create_dataset_client(name='new-dataset', configuration=configuration) # Verify files were created assert client.path_to_dataset.exists() @@ -45,7 +45,7 @@ async def test_file_and_directory_creation(configuration: Configuration) -> None metadata = json.load(f) client_metadata = await client.get_metadata() assert metadata['id'] == client_metadata.id - assert metadata['name'] == 'new_dataset' + assert metadata['name'] == 'new-dataset' assert metadata['item_count'] == 0 await client.drop() diff --git a/tests/unit/storage_clients/_file_system/test_fs_kvs_client.py b/tests/unit/storage_clients/_file_system/test_fs_kvs_client.py index b9702299a0..9e5f9c59f0 100644 --- a/tests/unit/storage_clients/_file_system/test_fs_kvs_client.py +++ b/tests/unit/storage_clients/_file_system/test_fs_kvs_client.py @@ -27,14 +27,14 @@ def configuration(tmp_path: Path) -> Configuration: @pytest.fixture async def kvs_client(configuration: Configuration) -> AsyncGenerator[FileSystemKeyValueStoreClient, None]: """A fixture for a file system key-value store client.""" - client = await FileSystemStorageClient().create_kvs_client(name='test_kvs', configuration=configuration) + client = await FileSystemStorageClient().create_kvs_client(name='test-kvs', configuration=configuration) yield client await client.drop() async def test_file_and_directory_creation(configuration: Configuration) -> None: """Test that file system KVS creates proper files and directories.""" - client = await FileSystemStorageClient().create_kvs_client(name='new_kvs', configuration=configuration) + client = await FileSystemStorageClient().create_kvs_client(name='new-kvs', configuration=configuration) # Verify files were created assert client.path_to_kvs.exists() @@ -44,7 +44,7 @@ async def test_file_and_directory_creation(configuration: Configuration) -> None with client.path_to_metadata.open() as f: metadata = json.load(f) assert metadata['id'] == (await client.get_metadata()).id - assert metadata['name'] == 'new_kvs' + assert metadata['name'] == 'new-kvs' await client.drop() diff --git a/tests/unit/storage_clients/_file_system/test_fs_rq_client.py b/tests/unit/storage_clients/_file_system/test_fs_rq_client.py index dc2937a259..f47e58528f 100644 --- a/tests/unit/storage_clients/_file_system/test_fs_rq_client.py +++ b/tests/unit/storage_clients/_file_system/test_fs_rq_client.py @@ -28,7 +28,7 @@ def configuration(tmp_path: Path) -> Configuration: async def rq_client() -> AsyncGenerator[FileSystemRequestQueueClient, None]: """A fixture for a file system request queue client.""" client = await FileSystemStorageClient().create_rq_client( - name='test_request_queue', + name='test-request-queue', ) yield client await client.drop() @@ -36,7 +36,7 @@ async def rq_client() -> AsyncGenerator[FileSystemRequestQueueClient, None]: async def test_file_and_directory_creation() -> None: """Test that file system RQ creates proper files and directories.""" - client = await FileSystemStorageClient().create_rq_client(name='new_request_queue') + client = await FileSystemStorageClient().create_rq_client(name='new-request-queue') # Verify files were created assert client.path_to_rq.exists() @@ -46,7 +46,7 @@ async def test_file_and_directory_creation() -> None: with client.path_to_metadata.open() as f: metadata = json.load(f) assert metadata['id'] == (await client.get_metadata()).id - assert metadata['name'] == 'new_request_queue' + assert metadata['name'] == 'new-request-queue' await client.drop() diff --git a/tests/unit/storage_clients/_memory/test_memory_dataset_client.py b/tests/unit/storage_clients/_memory/test_memory_dataset_client.py index c503374ce7..ccb29dec32 100644 --- a/tests/unit/storage_clients/_memory/test_memory_dataset_client.py +++ b/tests/unit/storage_clients/_memory/test_memory_dataset_client.py @@ -16,7 +16,7 @@ @pytest.fixture async def dataset_client() -> AsyncGenerator[MemoryDatasetClient, None]: """Fixture that provides a fresh memory dataset client for each test.""" - client = await MemoryStorageClient().create_dataset_client(name='test_dataset') + client = await MemoryStorageClient().create_dataset_client(name='test-dataset') yield client await client.drop() @@ -25,7 +25,7 @@ async def test_memory_specific_purge_behavior() -> None: """Test memory-specific purge behavior and in-memory storage characteristics.""" # Create dataset and add data dataset_client1 = await MemoryStorageClient().create_dataset_client( - name='test_purge_dataset', + name='test-purge-dataset', ) await dataset_client1.push_data({'item': 'initial data'}) @@ -35,7 +35,7 @@ async def test_memory_specific_purge_behavior() -> None: # Reopen with same storage client instance dataset_client2 = await MemoryStorageClient().create_dataset_client( - name='test_purge_dataset', + name='test-purge-dataset', ) # Verify data was purged (memory storage specific behavior) diff --git a/tests/unit/storage_clients/_memory/test_memory_kvs_client.py b/tests/unit/storage_clients/_memory/test_memory_kvs_client.py index ef55107393..4dfc44085e 100644 --- a/tests/unit/storage_clients/_memory/test_memory_kvs_client.py +++ b/tests/unit/storage_clients/_memory/test_memory_kvs_client.py @@ -16,7 +16,7 @@ @pytest.fixture async def kvs_client() -> AsyncGenerator[MemoryKeyValueStoreClient, None]: """Fixture that provides a fresh memory key-value store client for each test.""" - client = await MemoryStorageClient().create_kvs_client(name='test_kvs') + client = await MemoryStorageClient().create_kvs_client(name='test-kvs') yield client await client.drop() @@ -26,7 +26,7 @@ async def test_memory_specific_purge_behavior() -> None: # Create KVS and add data kvs_client1 = await MemoryStorageClient().create_kvs_client( - name='test_purge_kvs', + name='test-purge-kvs', ) await kvs_client1.set_value(key='test-key', value='initial value') @@ -37,7 +37,7 @@ async def test_memory_specific_purge_behavior() -> None: # Reopen with same storage client instance kvs_client2 = await MemoryStorageClient().create_kvs_client( - name='test_purge_kvs', + name='test-purge-kvs', ) # Verify value was purged (memory storage specific behavior) diff --git a/tests/unit/storage_clients/_memory/test_memory_rq_client.py b/tests/unit/storage_clients/_memory/test_memory_rq_client.py index 8bfe8632df..1846712084 100644 --- a/tests/unit/storage_clients/_memory/test_memory_rq_client.py +++ b/tests/unit/storage_clients/_memory/test_memory_rq_client.py @@ -17,7 +17,7 @@ @pytest.fixture async def rq_client() -> AsyncGenerator[MemoryRequestQueueClient, None]: """Fixture that provides a fresh memory request queue client for each test.""" - client = await MemoryStorageClient().create_rq_client(name='test_rq') + client = await MemoryStorageClient().create_rq_client(name='test-rq') yield client await client.drop() @@ -26,7 +26,7 @@ async def test_memory_specific_purge_behavior() -> None: """Test memory-specific purge behavior and in-memory storage characteristics.""" # Create RQ and add data rq_client1 = await MemoryStorageClient().create_rq_client( - name='test_purge_rq', + name='test-purge-rq', ) request = Request.from_url(url='https://example.com/initial') await rq_client1.add_batch_of_requests([request]) @@ -36,7 +36,7 @@ async def test_memory_specific_purge_behavior() -> None: # Reopen with same storage client instance rq_client2 = await MemoryStorageClient().create_rq_client( - name='test_purge_rq', + name='test-purge-rq', ) # Verify queue was purged (memory storage specific behavior) diff --git a/tests/unit/storage_clients/_sql/test_sql_dataset_client.py b/tests/unit/storage_clients/_sql/test_sql_dataset_client.py index 2525d6db22..6b94e146d3 100644 --- a/tests/unit/storage_clients/_sql/test_sql_dataset_client.py +++ b/tests/unit/storage_clients/_sql/test_sql_dataset_client.py @@ -44,7 +44,7 @@ async def dataset_client( async with SqlStorageClient() as storage_client: monkeypatch.setattr(storage_client, '_accessed_modified_update_interval', timedelta(seconds=0)) client = await storage_client.create_dataset_client( - name='test_dataset', + name='test-dataset', configuration=configuration, ) yield client @@ -57,7 +57,7 @@ async def test_create_tables_with_connection_string(configuration: Configuration async with SqlStorageClient(connection_string=f'sqlite+aiosqlite:///{storage_dir}') as storage_client: await storage_client.create_dataset_client( - name='new_dataset', + name='new-dataset', configuration=configuration, ) @@ -75,7 +75,7 @@ async def test_create_tables_with_engine(configuration: Configuration, tmp_path: async with SqlStorageClient(engine=engine) as storage_client: await storage_client.create_dataset_client( - name='new_dataset', + name='new-dataset', configuration=configuration, ) @@ -89,7 +89,7 @@ async def test_tables_and_metadata_record(configuration: Configuration) -> None: """Test that SQL dataset creates proper tables and metadata records.""" async with SqlStorageClient() as storage_client: client = await storage_client.create_dataset_client( - name='new_dataset', + name='new-dataset', configuration=configuration, ) @@ -101,12 +101,12 @@ async def test_tables_and_metadata_record(configuration: Configuration) -> None: assert 'datasets' in tables async with client.get_session() as session: - stmt = select(DatasetMetadataDb).where(DatasetMetadataDb.name == 'new_dataset') + stmt = select(DatasetMetadataDb).where(DatasetMetadataDb.name == 'new-dataset') result = await session.execute(stmt) orm_metadata = result.scalar_one_or_none() assert orm_metadata is not None assert orm_metadata.id == client_metadata.id - assert orm_metadata.name == 'new_dataset' + assert orm_metadata.name == 'new-dataset' assert orm_metadata.item_count == 0 await client.drop() diff --git a/tests/unit/storage_clients/_sql/test_sql_kvs_client.py b/tests/unit/storage_clients/_sql/test_sql_kvs_client.py index a2152f0b01..6bd02df750 100644 --- a/tests/unit/storage_clients/_sql/test_sql_kvs_client.py +++ b/tests/unit/storage_clients/_sql/test_sql_kvs_client.py @@ -40,7 +40,7 @@ async def kvs_client( async with SqlStorageClient() as storage_client: monkeypatch.setattr(storage_client, '_accessed_modified_update_interval', timedelta(seconds=0)) client = await storage_client.create_kvs_client( - name='test_kvs', + name='test-kvs', configuration=configuration, ) monkeypatch.setattr(client, '_accessed_modified_update_interval', timedelta(seconds=0)) @@ -60,7 +60,7 @@ async def test_create_tables_with_connection_string(configuration: Configuration async with SqlStorageClient(connection_string=f'sqlite+aiosqlite:///{storage_dir}') as storage_client: await storage_client.create_kvs_client( - name='new_kvs', + name='new-kvs', configuration=configuration, ) @@ -78,7 +78,7 @@ async def test_create_tables_with_engine(configuration: Configuration, tmp_path: async with SqlStorageClient(engine=engine) as storage_client: await storage_client.create_kvs_client( - name='new_kvs', + name='new-kvs', configuration=configuration, ) @@ -92,7 +92,7 @@ async def test_tables_and_metadata_record(configuration: Configuration) -> None: """Test that SQL key-value store creates proper tables and metadata records.""" async with SqlStorageClient() as storage_client: client = await storage_client.create_kvs_client( - name='new_kvs', + name='new-kvs', configuration=configuration, ) @@ -104,12 +104,12 @@ async def test_tables_and_metadata_record(configuration: Configuration) -> None: assert 'key_value_store_records' in tables async with client.get_session() as session: - stmt = select(KeyValueStoreMetadataDb).where(KeyValueStoreMetadataDb.name == 'new_kvs') + stmt = select(KeyValueStoreMetadataDb).where(KeyValueStoreMetadataDb.name == 'new-kvs') result = await session.execute(stmt) orm_metadata = result.scalar_one_or_none() metadata = KeyValueStoreMetadata.model_validate(orm_metadata) assert metadata.id == client_metadata.id - assert metadata.name == 'new_kvs' + assert metadata.name == 'new-kvs' await client.drop() diff --git a/tests/unit/storage_clients/_sql/test_sql_rq_client.py b/tests/unit/storage_clients/_sql/test_sql_rq_client.py index 117252bcf9..8885f3cf88 100644 --- a/tests/unit/storage_clients/_sql/test_sql_rq_client.py +++ b/tests/unit/storage_clients/_sql/test_sql_rq_client.py @@ -41,7 +41,7 @@ async def rq_client( async with SqlStorageClient() as storage_client: monkeypatch.setattr(storage_client, '_accessed_modified_update_interval', timedelta(seconds=0)) client = await storage_client.create_rq_client( - name='test_request_queue', + name='test-request-queue', configuration=configuration, ) monkeypatch.setattr(client, '_accessed_modified_update_interval', timedelta(seconds=0)) @@ -61,7 +61,7 @@ async def test_create_tables_with_connection_string(configuration: Configuration async with SqlStorageClient(connection_string=f'sqlite+aiosqlite:///{storage_dir}') as storage_client: await storage_client.create_rq_client( - name='test_request_queue', + name='test-request-queue', configuration=configuration, ) @@ -80,7 +80,7 @@ async def test_create_tables_with_engine(configuration: Configuration, tmp_path: async with SqlStorageClient(engine=engine) as storage_client: await storage_client.create_rq_client( - name='test_request_queue', + name='test-request-queue', configuration=configuration, ) @@ -95,7 +95,7 @@ async def test_tables_and_metadata_record(configuration: Configuration) -> None: """Test that SQL request queue creates proper tables and metadata records.""" async with SqlStorageClient() as storage_client: client = await storage_client.create_rq_client( - name='test_request_queue', + name='test-request-queue', configuration=configuration, ) @@ -108,12 +108,12 @@ async def test_tables_and_metadata_record(configuration: Configuration) -> None: assert 'request_queue_state' in tables async with client.get_session() as session: - stmt = select(RequestQueueMetadataDb).where(RequestQueueMetadataDb.name == 'test_request_queue') + stmt = select(RequestQueueMetadataDb).where(RequestQueueMetadataDb.name == 'test-request-queue') result = await session.execute(stmt) orm_metadata = result.scalar_one_or_none() metadata = RequestQueueMetadata.model_validate(orm_metadata) assert metadata.id == client_metadata.id - assert metadata.name == 'test_request_queue' + assert metadata.name == 'test-request-queue' await client.drop() diff --git a/tests/unit/storages/test_dataset.py b/tests/unit/storages/test_dataset.py index 4d5db8dfe4..a71f68dfa6 100644 --- a/tests/unit/storages/test_dataset.py +++ b/tests/unit/storages/test_dataset.py @@ -40,13 +40,13 @@ async def test_open_creates_new_dataset( ) -> None: """Test that open() creates a new dataset with proper metadata.""" dataset = await Dataset.open( - name='new_dataset', + name='new-dataset', storage_client=storage_client, ) # Verify dataset properties assert dataset.id is not None - assert dataset.name == 'new_dataset' + assert dataset.name == 'new-dataset' metadata = await dataset.get_metadata() assert metadata.item_count == 0 @@ -98,7 +98,7 @@ async def test_open_by_id( """Test opening a dataset by its ID.""" # First create a dataset by name dataset1 = await Dataset.open( - name='dataset_by_id_test', + name='dataset-by-id-test', storage_client=storage_client, ) @@ -114,7 +114,7 @@ async def test_open_by_id( # Verify it's the same dataset assert dataset2.id == dataset1.id - assert dataset2.name == 'dataset_by_id_test' + assert dataset2.name == 'dataset-by-id-test' # Verify the data is still there data = await dataset2.get_data() @@ -373,7 +373,7 @@ async def test_drop( ) -> None: """Test dropping a dataset removes it from cache and clears its data.""" dataset = await Dataset.open( - name='drop_test', + name='drop-test', storage_client=storage_client, ) @@ -385,7 +385,7 @@ async def test_drop( # Verify dataset is empty (by creating a new one with the same name) new_dataset = await Dataset.open( - name='drop_test', + name='drop-test', storage_client=storage_client, ) @@ -401,7 +401,7 @@ async def test_export_to_json( """Test exporting dataset to JSON format.""" # Create a key-value store for export kvs = await KeyValueStore.open( - name='export_kvs', + name='export-kvs', ) # Add some items to the dataset @@ -416,7 +416,7 @@ async def test_export_to_json( await dataset.export_to( key='dataset_export.json', content_type='json', - to_kvs_name='export_kvs', + to_kvs_name='export-kvs', to_kvs_storage_client=storage_client, ) @@ -439,7 +439,7 @@ async def test_export_to_csv( """Test exporting dataset to CSV format.""" # Create a key-value store for export kvs = await KeyValueStore.open( - name='export_kvs', + name='export-kvs', storage_client=storage_client, ) @@ -455,7 +455,7 @@ async def test_export_to_csv( await dataset.export_to( key='dataset_export.csv', content_type='csv', - to_kvs_name='export_kvs', + to_kvs_name='export-kvs', to_kvs_storage_client=storage_client, ) @@ -483,7 +483,7 @@ async def test_export_to_invalid_content_type(dataset: Dataset) -> None: async def test_export_with_multiple_kwargs(dataset: Dataset, tmp_path: Path) -> None: """Test exporting dataset using many optional arguments together.""" - target_kvs_name = 'some_kvs' + target_kvs_name = 'some-kvs' target_storage_client = FileSystemStorageClient() export_key = 'exported_dataset' data = {'some key': 'some data'} @@ -542,7 +542,7 @@ async def test_purge( """Test purging a dataset removes all data but keeps the dataset itself.""" # First create a dataset dataset = await Dataset.open( - name='purge_test_dataset', + name='purge-test-dataset', storage_client=storage_client, ) @@ -569,7 +569,7 @@ async def test_purge( # Verify the dataset still exists but is empty assert dataset.id == dataset_id # Same ID preserved - assert dataset.name == 'purge_test_dataset' # Same name preserved + assert dataset.name == 'purge-test-dataset' # Same name preserved # Dataset should be empty now data = await dataset.get_data() @@ -739,23 +739,23 @@ async def test_named_vs_alias_conflict_detection( ) -> None: """Test that conflicts between named and alias storages are detected.""" # Test 1: Create named storage first, then try alias with same name - named_dataset = await Dataset.open(name='conflict_test', storage_client=storage_client) - assert named_dataset.name == 'conflict_test' + named_dataset = await Dataset.open(name='conflict-test', storage_client=storage_client) + assert named_dataset.name == 'conflict-test' # Try to create alias with same name - should raise error - with pytest.raises(ValueError, match=r'Cannot create alias storage "conflict_test".*already exists'): - await Dataset.open(alias='conflict_test', storage_client=storage_client) + with pytest.raises(ValueError, match=r'Cannot create alias storage "conflict-test".*already exists'): + await Dataset.open(alias='conflict-test', storage_client=storage_client) # Clean up await named_dataset.drop() # Test 2: Create alias first, then try named with same name - alias_dataset = await Dataset.open(alias='conflict_test2', storage_client=storage_client) + alias_dataset = await Dataset.open(alias='conflict-test2', storage_client=storage_client) assert alias_dataset.name is None # Alias storages have no name # Try to create named with same name - should raise error - with pytest.raises(ValueError, match=r'Cannot create named storage "conflict_test2".*already exists'): - await Dataset.open(name='conflict_test2', storage_client=storage_client) + with pytest.raises(ValueError, match=r'Cannot create named storage "conflict-test2".*already exists'): + await Dataset.open(name='conflict-test2', storage_client=storage_client) # Clean up await alias_dataset.drop() @@ -790,12 +790,12 @@ async def test_alias_vs_named_isolation( """Test that alias and named datasets with same identifier are isolated.""" # Create named dataset named_dataset = await Dataset.open( - name='test_identifier', + name='test-identifier', storage_client=storage_client, ) # Verify named dataset - assert named_dataset.name == 'test_identifier' + assert named_dataset.name == 'test-identifier' await named_dataset.push_data({'type': 'named'}) # Clean up named dataset first @@ -887,13 +887,13 @@ async def test_purge_on_start_enabled(storage_client: StorageClient) -> None: ) alias_dataset = await Dataset.open( - alias='purge_test_alias', + alias='purge-test-alias', storage_client=storage_client, configuration=configuration, ) named_dataset = await Dataset.open( - name='purge_test_named', + name='purge-test-named', storage_client=storage_client, configuration=configuration, ) @@ -918,7 +918,7 @@ async def test_purge_on_start_enabled(storage_client: StorageClient) -> None: assert default_metadata.name is None assert alias_metadata.name is None - assert named_metadata.name == 'purge_test_named' + assert named_metadata.name == 'purge-test-named' # Clear storage cache to simulate "reopening" storages service_locator.storage_instance_manager.clear_cache() @@ -929,12 +929,12 @@ async def test_purge_on_start_enabled(storage_client: StorageClient) -> None: configuration=configuration, ) alias_dataset_2 = await Dataset.open( - alias='purge_test_alias', + alias='purge-test-alias', storage_client=storage_client, configuration=configuration, ) named_dataset_2 = await Dataset.open( - name='purge_test_named', + name='purge-test-named', storage_client=storage_client, configuration=configuration, ) @@ -973,13 +973,13 @@ async def test_purge_on_start_disabled(storage_client: StorageClient) -> None: ) alias_dataset = await Dataset.open( - alias='purge_test_alias', + alias='purge-test-alias', storage_client=storage_client, configuration=configuration, ) named_dataset = await Dataset.open( - name='purge_test_named', + name='purge-test-named', storage_client=storage_client, configuration=configuration, ) @@ -1004,7 +1004,7 @@ async def test_purge_on_start_disabled(storage_client: StorageClient) -> None: assert default_metadata.name is None assert alias_metadata.name is None - assert named_metadata.name == 'purge_test_named' + assert named_metadata.name == 'purge-test-named' # Clear storage cache to simulate "reopening" storages service_locator.storage_instance_manager.clear_cache() @@ -1015,12 +1015,12 @@ async def test_purge_on_start_disabled(storage_client: StorageClient) -> None: configuration=configuration, ) alias_dataset_2 = await Dataset.open( - alias='purge_test_alias', + alias='purge-test-alias', storage_client=storage_client, configuration=configuration, ) named_dataset_2 = await Dataset.open( - name='purge_test_named', + name='purge-test-named', storage_client=storage_client, configuration=configuration, ) @@ -1053,3 +1053,35 @@ async def test_name_default_not_allowed(storage_client: StorageClient) -> None: f'it is reserved for default alias.', ): await Dataset.open(name=StorageInstanceManager._DEFAULT_STORAGE_ALIAS, storage_client=storage_client) + + +@pytest.mark.parametrize( + ('name', 'is_valid'), + [ + pytest.param('F', True, id='single-char'), + pytest.param('7', True, id='single-digit'), + pytest.param('FtghdfseySds', True, id='mixed-case'), + pytest.param('125673450', True, id='all-digits'), + pytest.param('Ft2134Sfe0O1hf', True, id='mixed-alphanumeric'), + pytest.param('name-with-dashes', True, id='dashes'), + pytest.param('1-value', True, id='number start'), + pytest.param('value-1', True, id='number end'), + pytest.param('test-1-value', True, id='number middle'), + pytest.param('test-------value', True, id='multiple-dashes'), + pytest.param('test-VALUES-test', True, id='multiple-cases'), + pytest.param('name_with_underscores', False, id='underscores'), + pytest.param('name with spaces', False, id='spaces'), + pytest.param('-test', False, id='dashes start'), + pytest.param('test-', False, id='dashes end'), + ], +) +async def test_validate_name(storage_client: StorageClient, name: str, *, is_valid: bool) -> None: + """Test name validation logic.""" + if is_valid: + # Should not raise + dataset = await Dataset.open(name=name, storage_client=storage_client) + assert dataset.name == name + await dataset.drop() + else: + with pytest.raises(ValueError, match=rf'Invalid storage name "{name}".*'): + await Dataset.open(name=name, storage_client=storage_client) diff --git a/tests/unit/storages/test_key_value_store.py b/tests/unit/storages/test_key_value_store.py index 21cdf6ad1b..be5ccdcfb6 100644 --- a/tests/unit/storages/test_key_value_store.py +++ b/tests/unit/storages/test_key_value_store.py @@ -37,13 +37,13 @@ async def test_open_creates_new_kvs( ) -> None: """Test that open() creates a new key-value store with proper metadata.""" kvs = await KeyValueStore.open( - name='new_kvs', + name='new-kvs', storage_client=storage_client, ) # Verify key-value store properties assert kvs.id is not None - assert kvs.name == 'new_kvs' + assert kvs.name == 'new-kvs' await kvs.drop() @@ -89,7 +89,7 @@ async def test_open_by_id( """Test opening a key-value store by its ID.""" # First create a key-value store by name kvs1 = await KeyValueStore.open( - name='kvs_by_id_test', + name='kvs-by-id-test', storage_client=storage_client, ) @@ -104,7 +104,7 @@ async def test_open_by_id( # Verify it's the same key-value store assert kvs2.id == kvs1.id - assert kvs2.name == 'kvs_by_id_test' + assert kvs2.name == 'kvs-by-id-test' # Verify the data is still there value = await kvs2.get_value('test_key') @@ -274,7 +274,7 @@ async def test_drop( ) -> None: """Test dropping a key-value store removes it from cache and clears its data.""" kvs = await KeyValueStore.open( - name='drop_test', + name='drop-test', storage_client=storage_client, ) @@ -286,7 +286,7 @@ async def test_drop( # Verify key-value store is empty (by creating a new one with the same name) new_kvs = await KeyValueStore.open( - name='drop_test', + name='drop-test', storage_client=storage_client, ) @@ -414,7 +414,7 @@ async def test_purge( """Test purging a key-value store removes all values but keeps the store itself.""" # First create a key-value store kvs = await KeyValueStore.open( - name='purge_test_kvs', + name='purge-test-kvs', storage_client=storage_client, ) @@ -435,7 +435,7 @@ async def test_purge( # Verify the store still exists but is empty assert kvs.id == kvs_id # Same ID preserved - assert kvs.name == 'purge_test_kvs' # Same name preserved + assert kvs.name == 'purge-test-kvs' # Same name preserved # Store should be empty now keys = await kvs.list_keys() @@ -748,32 +748,32 @@ async def test_named_vs_alias_conflict_detection( ) -> None: """Test that conflicts between named and alias storages are detected.""" # Test 1: Create named storage first, then try alias with same name - named_kvs = await KeyValueStore.open(name='conflict_test', storage_client=storage_client) - assert named_kvs.name == 'conflict_test' + named_kvs = await KeyValueStore.open(name='conflict-test', storage_client=storage_client) + assert named_kvs.name == 'conflict-test' # Try to create alias with same name - should raise error - with pytest.raises(ValueError, match=r'Cannot create alias storage "conflict_test".*already exists'): - await KeyValueStore.open(alias='conflict_test', storage_client=storage_client) + with pytest.raises(ValueError, match=r'Cannot create alias storage "conflict-test".*already exists'): + await KeyValueStore.open(alias='conflict-test', storage_client=storage_client) # Clean up await named_kvs.drop() # Test 2: Create alias first, then try named with same name - alias_kvs = await KeyValueStore.open(alias='conflict_test2', storage_client=storage_client) + alias_kvs = await KeyValueStore.open(alias='conflict-test2', storage_client=storage_client) assert alias_kvs.name is None # Alias storages have no name # Try to create named with same name - should raise error - with pytest.raises(ValueError, match=r'Cannot create named storage "conflict_test2".*already exists'): - await KeyValueStore.open(name='conflict_test2', storage_client=storage_client) + with pytest.raises(ValueError, match=r'Cannot create named storage "conflict-test2".*already exists'): + await KeyValueStore.open(name='conflict-test2', storage_client=storage_client) # Clean up await alias_kvs.drop() # Test 3: Different names should work fine - named_kvs_ok = await KeyValueStore.open(name='different_name', storage_client=storage_client) - alias_kvs_ok = await KeyValueStore.open(alias='different_alias', storage_client=storage_client) + named_kvs_ok = await KeyValueStore.open(name='different-name', storage_client=storage_client) + alias_kvs_ok = await KeyValueStore.open(alias='different-alias', storage_client=storage_client) - assert named_kvs_ok.name == 'different_name' + assert named_kvs_ok.name == 'different-name' assert alias_kvs_ok.name is None # Clean up @@ -809,12 +809,12 @@ async def test_alias_vs_named_isolation( """Test that alias and named key-value stores with same identifier are isolated.""" # Create named kvs named_kvs = await KeyValueStore.open( - name='test_identifier', + name='test-identifier', storage_client=storage_client, ) # Verify named kvs - assert named_kvs.name == 'test_identifier' + assert named_kvs.name == 'test-identifier' await named_kvs.set_value('type', 'named') # Clean up named kvs first @@ -910,13 +910,13 @@ async def test_purge_on_start_enabled(storage_client: StorageClient) -> None: ) alias_kvs = await KeyValueStore.open( - alias='purge_test_alias', + alias='purge-test-alias', storage_client=storage_client, configuration=configuration, ) named_kvs = await KeyValueStore.open( - name='purge_test_named', + name='purge-test-named', storage_client=storage_client, configuration=configuration, ) @@ -941,7 +941,7 @@ async def test_purge_on_start_enabled(storage_client: StorageClient) -> None: assert default_metadata.name is None assert alias_metadata.name is None - assert named_metadata.name == 'purge_test_named' + assert named_metadata.name == 'purge-test-named' # Clear storage cache to simulate "reopening" storages service_locator.storage_instance_manager.clear_cache() @@ -952,12 +952,12 @@ async def test_purge_on_start_enabled(storage_client: StorageClient) -> None: configuration=configuration, ) alias_kvs_2 = await KeyValueStore.open( - alias='purge_test_alias', + alias='purge-test-alias', storage_client=storage_client, configuration=configuration, ) named_kvs_2 = await KeyValueStore.open( - name='purge_test_named', + name='purge-test-named', storage_client=storage_client, configuration=configuration, ) @@ -996,13 +996,13 @@ async def test_purge_on_start_disabled(storage_client: StorageClient) -> None: ) alias_kvs = await KeyValueStore.open( - alias='purge_test_alias', + alias='purge-test-alias', storage_client=storage_client, configuration=configuration, ) named_kvs = await KeyValueStore.open( - name='purge_test_named', + name='purge-test-named', storage_client=storage_client, configuration=configuration, ) @@ -1029,12 +1029,12 @@ async def test_purge_on_start_disabled(storage_client: StorageClient) -> None: configuration=configuration, ) alias_kvs_2 = await KeyValueStore.open( - alias='purge_test_alias', + alias='purge-test-alias', storage_client=storage_client, configuration=configuration, ) named_kvs_2 = await KeyValueStore.open( - name='purge_test_named', + name='purge-test-named', storage_client=storage_client, configuration=configuration, ) @@ -1063,3 +1063,35 @@ async def test_name_default_not_allowed(storage_client: StorageClient) -> None: f'it is reserved for default alias.', ): await KeyValueStore.open(name=StorageInstanceManager._DEFAULT_STORAGE_ALIAS, storage_client=storage_client) + + +@pytest.mark.parametrize( + ('name', 'is_valid'), + [ + pytest.param('F', True, id='single-char'), + pytest.param('7', True, id='single-digit'), + pytest.param('FtghdfseySds', True, id='mixed-case'), + pytest.param('125673450', True, id='all-digits'), + pytest.param('Ft2134Sfe0O1hf', True, id='mixed-alphanumeric'), + pytest.param('name-with-dashes', True, id='dashes'), + pytest.param('1-value', True, id='number start'), + pytest.param('value-1', True, id='number end'), + pytest.param('test-1-value', True, id='number middle'), + pytest.param('test-------value', True, id='multiple-dashes'), + pytest.param('test-VALUES-test', True, id='multiple-cases'), + pytest.param('name_with_underscores', False, id='underscores'), + pytest.param('name with spaces', False, id='spaces'), + pytest.param('-test', False, id='dashes start'), + pytest.param('test-', False, id='dashes end'), + ], +) +async def test_validate_name(storage_client: StorageClient, name: str, *, is_valid: bool) -> None: + """Test name validation logic.""" + if is_valid: + # Should not raise + dataset = await KeyValueStore.open(name=name, storage_client=storage_client) + assert dataset.name == name + await dataset.drop() + else: + with pytest.raises(ValueError, match=rf'Invalid storage name "{name}".*'): + await KeyValueStore.open(name=name, storage_client=storage_client) diff --git a/tests/unit/storages/test_request_queue.py b/tests/unit/storages/test_request_queue.py index 54dd8f0aab..bd1bcf9c1a 100644 --- a/tests/unit/storages/test_request_queue.py +++ b/tests/unit/storages/test_request_queue.py @@ -38,13 +38,13 @@ async def test_open_creates_new_rq( ) -> None: """Test that open() creates a new request queue with proper metadata.""" rq = await RequestQueue.open( - name='new_request_queue', + name='new-request-queue', storage_client=storage_client, ) # Verify request queue properties assert rq.id is not None - assert rq.name == 'new_request_queue' + assert rq.name == 'new-request-queue' metadata = await rq.get_metadata() assert metadata.pending_request_count == 0 assert metadata.handled_request_count == 0 @@ -94,7 +94,7 @@ async def test_open_by_id( """Test opening a request queue by its ID.""" # First create a request queue by name rq1 = await RequestQueue.open( - name='rq_by_id_test', + name='rq-by-id-test', storage_client=storage_client, ) @@ -109,7 +109,7 @@ async def test_open_by_id( # Verify it's the same request queue assert rq2.id == rq1.id - assert rq2.name == 'rq_by_id_test' + assert rq2.name == 'rq-by-id-test' # Verify the request is still there request = await rq2.fetch_next_request() @@ -478,7 +478,7 @@ async def test_drop( ) -> None: """Test dropping a request queue removes it from cache and clears its data.""" rq = await RequestQueue.open( - name='drop_test', + name='drop-test', storage_client=storage_client, ) @@ -490,7 +490,7 @@ async def test_drop( # Verify request queue is empty (by creating a new one with the same name) new_rq = await RequestQueue.open( - name='drop_test', + name='drop-test', storage_client=storage_client, ) @@ -565,7 +565,7 @@ async def test_purge( """Test purging a request queue removes all requests but keeps the queue itself.""" # First create a request queue rq = await RequestQueue.open( - name='purge_test_queue', + name='purge-test-queue', storage_client=storage_client, ) @@ -592,7 +592,7 @@ async def test_purge( # Verify the queue still exists but is empty assert rq.id == queue_id # Same ID preserved - assert rq.name == 'purge_test_queue' # Same name preserved + assert rq.name == 'purge-test-queue' # Same name preserved # Queue should be empty now metadata = await rq.get_metadata() @@ -853,34 +853,34 @@ async def test_named_vs_alias_conflict_detection( """Test that conflicts between named and alias storages are detected.""" # Test 1: Create named storage first, then try alias with same name named_rq = await RequestQueue.open( - name='conflict_test', + name='conflict-test', storage_client=storage_client, ) - assert named_rq.name == 'conflict_test' + assert named_rq.name == 'conflict-test' # Try to create alias with same name - should raise error - with pytest.raises(ValueError, match=r'Cannot create alias storage "conflict_test".*already exists'): - await RequestQueue.open(alias='conflict_test', storage_client=storage_client) + with pytest.raises(ValueError, match=r'Cannot create alias storage "conflict-test".*already exists'): + await RequestQueue.open(alias='conflict-test', storage_client=storage_client) # Clean up await named_rq.drop() # Test 2: Create alias first, then try named with same name - alias_rq = await RequestQueue.open(alias='conflict_test2', storage_client=storage_client) + alias_rq = await RequestQueue.open(alias='conflict-test2', storage_client=storage_client) assert alias_rq.name is None # Alias storages have no name # Try to create named with same name - should raise error - with pytest.raises(ValueError, match=r'Cannot create named storage "conflict_test2".*already exists'): - await RequestQueue.open(name='conflict_test2', storage_client=storage_client) + with pytest.raises(ValueError, match=r'Cannot create named storage "conflict-test2".*already exists'): + await RequestQueue.open(name='conflict-test2', storage_client=storage_client) # Clean up await alias_rq.drop() # Test 3: Different names should work fine - named_rq_ok = await RequestQueue.open(name='different_name') - alias_rq_ok = await RequestQueue.open(alias='different_alias') + named_rq_ok = await RequestQueue.open(name='different-name') + alias_rq_ok = await RequestQueue.open(alias='different-alias') - assert named_rq_ok.name == 'different_name' + assert named_rq_ok.name == 'different-name' assert alias_rq_ok.name is None # Clean up @@ -916,12 +916,12 @@ async def test_alias_vs_named_isolation( """Test that alias and named request queues with same identifier are isolated.""" # Create named request queue named_rq = await RequestQueue.open( - name='test_identifier', + name='test-identifier', storage_client=storage_client, ) # Verify named request queue - assert named_rq.name == 'test_identifier' + assert named_rq.name == 'test-identifier' await named_rq.add_request('https://named.example.com') # Clean up named request queue first @@ -929,7 +929,7 @@ async def test_alias_vs_named_isolation( # Now create alias request queue with same identifier (should work after cleanup) alias_rq = await RequestQueue.open( - alias='test_identifier', + alias='test-identifier', storage_client=storage_client, ) @@ -1015,13 +1015,13 @@ async def test_purge_on_start_enabled(storage_client: StorageClient) -> None: ) alias_rq = await RequestQueue.open( - alias='purge_test_alias', + alias='purge-test-alias', storage_client=storage_client, configuration=configuration, ) named_rq = await RequestQueue.open( - name='purge_test_named', + name='purge-test-named', storage_client=storage_client, configuration=configuration, ) @@ -1080,7 +1080,7 @@ async def test_purge_on_start_enabled(storage_client: StorageClient) -> None: # Verify that default and alias storages are unnamed assert default_metadata.name is None assert alias_metadata.name is None - assert named_metadata.name == 'purge_test_named' + assert named_metadata.name == 'purge-test-named' # Clear storage cache to simulate "reopening" storages service_locator.storage_instance_manager.clear_cache() @@ -1091,12 +1091,12 @@ async def test_purge_on_start_enabled(storage_client: StorageClient) -> None: configuration=configuration, ) alias_rq_2 = await RequestQueue.open( - alias='purge_test_alias', + alias='purge-test-alias', storage_client=storage_client, configuration=configuration, ) named_rq_2 = await RequestQueue.open( - name='purge_test_named', + name='purge-test-named', storage_client=storage_client, configuration=configuration, ) @@ -1141,13 +1141,13 @@ async def test_purge_on_start_disabled(storage_client: StorageClient) -> None: ) alias_rq = await RequestQueue.open( - alias='purge_test_alias', + alias='purge-test-alias', storage_client=storage_client, configuration=configuration, ) named_rq = await RequestQueue.open( - name='purge_test_named', + name='purge-test-named', storage_client=storage_client, configuration=configuration, ) @@ -1206,7 +1206,7 @@ async def test_purge_on_start_disabled(storage_client: StorageClient) -> None: # Verify that default and alias storages are unnamed assert default_metadata.name is None assert alias_metadata.name is None - assert named_metadata.name == 'purge_test_named' + assert named_metadata.name == 'purge-test-named' # Clear storage cache to simulate "reopening" storages service_locator.storage_instance_manager.clear_cache() @@ -1217,12 +1217,12 @@ async def test_purge_on_start_disabled(storage_client: StorageClient) -> None: configuration=configuration, ) alias_rq_2 = await RequestQueue.open( - alias='purge_test_alias', + alias='purge-test-alias', storage_client=storage_client, configuration=configuration, ) named_rq_2 = await RequestQueue.open( - name='purge_test_named', + name='purge-test-named', storage_client=storage_client, configuration=configuration, ) @@ -1259,3 +1259,35 @@ async def test_name_default_not_allowed(storage_client: StorageClient) -> None: f'it is reserved for default alias.', ): await RequestQueue.open(name=StorageInstanceManager._DEFAULT_STORAGE_ALIAS, storage_client=storage_client) + + +@pytest.mark.parametrize( + ('name', 'is_valid'), + [ + pytest.param('F', True, id='single-char'), + pytest.param('7', True, id='single-digit'), + pytest.param('FtghdfseySds', True, id='mixed-case'), + pytest.param('125673450', True, id='all-digits'), + pytest.param('Ft2134Sfe0O1hf', True, id='mixed-alphanumeric'), + pytest.param('name-with-dashes', True, id='dashes'), + pytest.param('1-value', True, id='number start'), + pytest.param('value-1', True, id='number end'), + pytest.param('test-1-value', True, id='number middle'), + pytest.param('test-------value', True, id='multiple-dashes'), + pytest.param('test-VALUES-test', True, id='multiple-cases'), + pytest.param('name_with_underscores', False, id='underscores'), + pytest.param('name with spaces', False, id='spaces'), + pytest.param('-test', False, id='dashes start'), + pytest.param('test-', False, id='dashes end'), + ], +) +async def test_validate_name(storage_client: StorageClient, name: str, *, is_valid: bool) -> None: + """Test name validation logic.""" + if is_valid: + # Should not raise + dataset = await RequestQueue.open(name=name, storage_client=storage_client) + assert dataset.name == name + await dataset.drop() + else: + with pytest.raises(ValueError, match=rf'Invalid storage name "{name}".*'): + await RequestQueue.open(name=name, storage_client=storage_client) From 40a7227157e4ef68b43d6c69032a9be1f3742c4c Mon Sep 17 00:00:00 2001 From: Max Bohomolov Date: Mon, 6 Oct 2025 16:21:36 +0000 Subject: [PATCH 2/6] up docs --- src/crawlee/storages/_base.py | 4 +++- src/crawlee/storages/_storage_instance_manager.py | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/crawlee/storages/_base.py b/src/crawlee/storages/_base.py index c0c4ee100a..e207e5cd9e 100644 --- a/src/crawlee/storages/_base.py +++ b/src/crawlee/storages/_base.py @@ -44,7 +44,9 @@ async def open( Args: id: The storage ID. - name: The storage name (global scope, persists across runs). + name: The storage name (global scope, persists across runs). Name can only contain letters "a" through "z", + the digits "0" through "9", and the hyphen ("-") but only in the middle of the string + (e.g. "my-value-1") alias: The storage alias (run scope, creates unnamed storage). configuration: Configuration object used during the storage creation or restoration process. storage_client: Underlying storage client to use. If not provided, the default global storage client diff --git a/src/crawlee/storages/_storage_instance_manager.py b/src/crawlee/storages/_storage_instance_manager.py index 4088c4102e..7047b7b7ed 100644 --- a/src/crawlee/storages/_storage_instance_manager.py +++ b/src/crawlee/storages/_storage_instance_manager.py @@ -92,7 +92,9 @@ async def open_storage_instance( Args: cls: The storage class to instantiate. id: Storage ID. - name: Storage name. (global scope, persists across runs). + name: Storage name. (global scope, persists across runs). Name can only contain letters "a" through "z", + the digits "0" through "9", and the hyphen ("-") but only in the middle of the string + (e.g. "my-value-1") alias: Storage alias (run scope, creates unnamed storage). client_opener_coro: Coroutine to open the storage client when storage instance not found in cache. storage_client_cache_key: Additional optional key from storage client to differentiate cache entries. From 9ab4655426938b67add00e06c2d536c2c188ce3a Mon Sep 17 00:00:00 2001 From: Max Bohomolov <34358312+Mantisus@users.noreply.github.com> Date: Tue, 7 Oct 2025 15:10:14 +0300 Subject: [PATCH 3/6] Update src/crawlee/storages/_base.py Co-authored-by: Vlada Dusek --- src/crawlee/storages/_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/crawlee/storages/_base.py b/src/crawlee/storages/_base.py index e207e5cd9e..cd17007904 100644 --- a/src/crawlee/storages/_base.py +++ b/src/crawlee/storages/_base.py @@ -46,7 +46,7 @@ async def open( id: The storage ID. name: The storage name (global scope, persists across runs). Name can only contain letters "a" through "z", the digits "0" through "9", and the hyphen ("-") but only in the middle of the string - (e.g. "my-value-1") + (e.g. "my-value-1"). alias: The storage alias (run scope, creates unnamed storage). configuration: Configuration object used during the storage creation or restoration process. storage_client: Underlying storage client to use. If not provided, the default global storage client From ed6de8da9bac60ae5fa5f41680765c0b53793bc4 Mon Sep 17 00:00:00 2001 From: Max Bohomolov <34358312+Mantisus@users.noreply.github.com> Date: Tue, 7 Oct 2025 15:10:21 +0300 Subject: [PATCH 4/6] Update src/crawlee/storages/_storage_instance_manager.py Co-authored-by: Vlada Dusek --- src/crawlee/storages/_storage_instance_manager.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/crawlee/storages/_storage_instance_manager.py b/src/crawlee/storages/_storage_instance_manager.py index 7047b7b7ed..f64d6e2c9f 100644 --- a/src/crawlee/storages/_storage_instance_manager.py +++ b/src/crawlee/storages/_storage_instance_manager.py @@ -94,7 +94,7 @@ async def open_storage_instance( id: Storage ID. name: Storage name. (global scope, persists across runs). Name can only contain letters "a" through "z", the digits "0" through "9", and the hyphen ("-") but only in the middle of the string - (e.g. "my-value-1") + (e.g. "my-value-1"). alias: Storage alias (run scope, creates unnamed storage). client_opener_coro: Coroutine to open the storage client when storage instance not found in cache. storage_client_cache_key: Additional optional key from storage client to differentiate cache entries. From 010f98699d5f933a692d28d0a86cb9d4daa8bb21 Mon Sep 17 00:00:00 2001 From: Max Bohomolov Date: Tue, 7 Oct 2025 16:11:22 +0000 Subject: [PATCH 5/6] ap upgrading guides --- docs/upgrading/upgrading_to_v1.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/upgrading/upgrading_to_v1.md b/docs/upgrading/upgrading_to_v1.md index 4f94d54c3d..7870a29c73 100644 --- a/docs/upgrading/upgrading_to_v1.md +++ b/docs/upgrading/upgrading_to_v1.md @@ -333,3 +333,7 @@ async def main() -> None: await crawler.run(['https://crawlee.dev/']) ``` + +### New storage naming restrictions + +We added naming restrictions for storages to align with Apify Platform requirements and avoid potential conflicts. Storage names can only contain letters "a" through "z" (both uppercase and lowercase), digits "0" through "9", and hyphens ("-") but only in the middle of the string (e.g., "my-storage-1"). From 90c3108ee138207780f4751f7fedc25c7c3b6a39 Mon Sep 17 00:00:00 2001 From: Vlada Dusek Date: Tue, 7 Oct 2025 19:53:23 +0200 Subject: [PATCH 6/6] wording --- docs/upgrading/upgrading_to_v1.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/upgrading/upgrading_to_v1.md b/docs/upgrading/upgrading_to_v1.md index 7870a29c73..010eb90cc3 100644 --- a/docs/upgrading/upgrading_to_v1.md +++ b/docs/upgrading/upgrading_to_v1.md @@ -336,4 +336,4 @@ async def main() -> None: ### New storage naming restrictions -We added naming restrictions for storages to align with Apify Platform requirements and avoid potential conflicts. Storage names can only contain letters "a" through "z" (both uppercase and lowercase), digits "0" through "9", and hyphens ("-") but only in the middle of the string (e.g., "my-storage-1"). +We've introduced naming restrictions for storages to ensure compatibility with Apify Platform requirements and prevent potential conflicts. Storage names may include only letters (a–z, A–Z), digits (0–9), and hyphens (-), with hyphens allowed only in the middle of the name (for example, my-storage-1).