diff --git a/pyproject.toml b/pyproject.toml index 85ed9803..5ab96174 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,7 +36,7 @@ keywords = [ dependencies = [ "apify-client>=2.0.0,<3.0.0", "apify-shared>=2.0.0,<3.0.0", - "crawlee==1.0.0rc1", + "crawlee @ git+https://github.com/apify/crawlee-python.git@storage-clients-and-configurations", "cachetools>=5.5.0", "cryptography>=42.0.0", # TODO: ensure compatibility with the latest version of lazy-object-proxy diff --git a/src/apify/_actor.py b/src/apify/_actor.py index c9044117..384d966b 100644 --- a/src/apify/_actor.py +++ b/src/apify/_actor.py @@ -13,7 +13,7 @@ from apify_client import ApifyClientAsync from apify_shared.consts import ActorEnvVars, ActorExitCodes, ApifyEnvVars -from crawlee import service_locator +from crawlee.errors import ServiceConflictError from crawlee.events import ( Event, EventAbortingData, @@ -25,7 +25,7 @@ ) from apify._charging import ChargeResult, ChargingManager, ChargingManagerImplementation -from apify._configuration import Configuration +from apify._configuration import Configuration, service_locator from apify._consts import EVENT_LISTENERS_TIMEOUT from apify._crypto import decrypt_input_secrets, load_private_key from apify._models import ActorRun @@ -119,28 +119,29 @@ def __init__( self._exit_process = self._get_default_exit_process() if exit_process is None else exit_process self._is_exiting = False - self._configuration = configuration or Configuration.get_global_configuration() + # Actor state when this method is being executed is unpredictable. + # Actor can be initialized by lazy object proxy or by user directly, or by both. + # Until `init` method is run, this state of uncertainty remains. This is the reason why any setting done here in + # `__init__` method should not be considered final. + + self._configuration = configuration self._configure_logging = configure_logging self._apify_client = self.new_client() - # Create an instance of the cloud storage client, the local storage client is obtained - # from the service locator. - self._cloud_storage_client = ApifyStorageClient() - # Set the event manager based on whether the Actor is running on the platform or locally. self._event_manager = ( ApifyEventManager( - configuration=self._configuration, - persist_state_interval=self._configuration.persist_state_interval, + configuration=self.config, + persist_state_interval=self.config.persist_state_interval, ) if self.is_at_home() else LocalEventManager( - system_info_interval=self._configuration.system_info_interval, - persist_state_interval=self._configuration.persist_state_interval, + system_info_interval=self.config.system_info_interval, + persist_state_interval=self.config.persist_state_interval, ) ) - self._charging_manager = ChargingManagerImplementation(self._configuration, self._apify_client) + self._charging_manager = ChargingManagerImplementation(self.config, self._apify_client) self._is_initialized = False @@ -203,12 +204,18 @@ def apify_client(self) -> ApifyClientAsync: @property def configuration(self) -> Configuration: """The Configuration instance the Actor instance uses.""" - return self._configuration + return self.config @property def config(self) -> Configuration: """The Configuration instance the Actor instance uses.""" - return self._configuration + if self._configuration: + return self._configuration + self.log.debug( + 'Implicit configuration used.' + "It's recommended to explicitly set the configuration to avoid unexpected behavior." + ) + return Configuration() @property def event_manager(self) -> EventManager: @@ -250,12 +257,31 @@ async def init(self) -> None: This method should be called immediately before performing any additional Actor actions, and it should be called only once. """ + if self._configuration: + # Set explicitly the configuration in the service locator + service_locator.set_configuration(self.configuration) + else: + try: + # Set implicit default Apify configuration, unless configuration was already set. + service_locator.set_configuration(self.configuration) + except ServiceConflictError: + self.log.info( + 'Configuration in service locator was set explicitly before Actor. ' + 'Using the existing configuration.' + ) + # Use the configuration from the service locator + self._configuration = service_locator.get_configuration() + if self._is_initialized: raise RuntimeError('The Actor was already initialized!') if _ActorType._is_any_instance_initialized: self.log.warning('Repeated Actor initialization detected - this is non-standard usage, proceed with care') + # Create an instance of the cloud storage client, the local storage client is obtained + # from the service locator + self._cloud_storage_client = ApifyStorageClient(configuration=self.configuration) + # Make sure that the currently initialized instance is also available through the global `Actor` proxy cast('Proxy', Actor).__wrapped__ = self @@ -267,7 +293,6 @@ async def init(self) -> None: service_locator.set_storage_client(self._cloud_storage_client) service_locator.set_event_manager(self.event_manager) - service_locator.set_configuration(self.configuration) # The logging configuration has to be called after all service_locator set methods. if self._configure_logging: @@ -385,8 +410,8 @@ def new_client( (increases exponentially from this value). timeout: The socket timeout of the HTTP requests sent to the Apify API. """ - token = token or self._configuration.token - api_url = api_url or self._configuration.api_base_url + token = token or self.config.token + api_url = api_url or self.config.api_base_url return ApifyClientAsync( token=token, api_url=api_url, @@ -429,7 +454,6 @@ async def open_dataset( return await Dataset.open( id=id, name=name, - configuration=self._configuration, storage_client=storage_client, ) @@ -463,7 +487,6 @@ async def open_key_value_store( return await KeyValueStore.open( id=id, name=name, - configuration=self._configuration, storage_client=storage_client, ) @@ -500,7 +523,6 @@ async def open_request_queue( return await RequestQueue.open( id=id, name=name, - configuration=self._configuration, storage_client=storage_client, ) @@ -549,9 +571,9 @@ async def get_input(self) -> Any: """Get the Actor input value from the default key-value store associated with the current Actor run.""" self._raise_if_not_initialized() - input_value = await self.get_value(self._configuration.input_key) - input_secrets_private_key = self._configuration.input_secrets_private_key_file - input_secrets_key_passphrase = self._configuration.input_secrets_private_key_passphrase + input_value = await self.get_value(self.config.input_key) + input_secrets_private_key = self.config.input_secrets_private_key_file + input_secrets_key_passphrase = self.config.input_secrets_private_key_passphrase if input_secrets_private_key and input_secrets_key_passphrase: private_key = load_private_key( input_secrets_private_key, @@ -688,7 +710,7 @@ def off(self, event_name: Event, listener: Callable | None = None) -> None: def is_at_home(self) -> bool: """Return `True` when the Actor is running on the Apify platform, and `False` otherwise (e.g. local run).""" - return self._configuration.is_at_home + return self.config.is_at_home def get_env(self) -> dict: """Return a dictionary with information parsed from all the `APIFY_XXX` environment variables. @@ -714,7 +736,7 @@ def get_env(self) -> dict: aliases = [field_name] for alias in aliases: - config[alias] = getattr(self._configuration, field_name) + config[alias] = getattr(self.config, field_name) env_vars = {env_var.value.lower(): env_var.name.lower() for env_var in [*ActorEnvVars, *ApifyEnvVars]} return {option_name: config[env_var] for env_var, option_name in env_vars.items() if env_var in config} @@ -1002,13 +1024,13 @@ async def metamorph( return if not custom_after_sleep: - custom_after_sleep = self._configuration.metamorph_after_sleep + custom_after_sleep = self.config.metamorph_after_sleep # If is_at_home() is True, config.actor_run_id is always set - if not self._configuration.actor_run_id: + if not self.config.actor_run_id: raise RuntimeError('actor_run_id cannot be None when running on the Apify platform.') - await self._apify_client.run(self._configuration.actor_run_id).metamorph( + await self._apify_client.run(self.config.actor_run_id).metamorph( target_actor_id=target_actor_id, run_input=run_input, target_actor_build=target_actor_build, @@ -1045,7 +1067,7 @@ async def reboot( _ActorType._is_rebooting = True if not custom_after_sleep: - custom_after_sleep = self._configuration.metamorph_after_sleep + custom_after_sleep = self.config.metamorph_after_sleep # Call all the listeners for the PERSIST_STATE and MIGRATING events, and wait for them to finish. # PERSIST_STATE listeners are called to allow the Actor to persist its state before the reboot. @@ -1065,10 +1087,10 @@ async def reboot( *[listener(EventMigratingData()) for listener in migrating_listeners], ) - if not self._configuration.actor_run_id: + if not self.config.actor_run_id: raise RuntimeError('actor_run_id cannot be None when running on the Apify platform.') - await self._apify_client.run(self._configuration.actor_run_id).reboot() + await self._apify_client.run(self.config.actor_run_id).reboot() if custom_after_sleep: await asyncio.sleep(custom_after_sleep.total_seconds()) @@ -1107,11 +1129,11 @@ async def add_webhook( return # If is_at_home() is True, config.actor_run_id is always set - if not self._configuration.actor_run_id: + if not self.config.actor_run_id: raise RuntimeError('actor_run_id cannot be None when running on the Apify platform.') await self._apify_client.webhooks().create( - actor_run_id=self._configuration.actor_run_id, + actor_run_id=self.config.actor_run_id, event_types=webhook.event_types, request_url=webhook.request_url, payload_template=webhook.payload_template, @@ -1143,10 +1165,10 @@ async def set_status_message( return None # If is_at_home() is True, config.actor_run_id is always set - if not self._configuration.actor_run_id: + if not self.config.actor_run_id: raise RuntimeError('actor_run_id cannot be None when running on the Apify platform.') - api_result = await self._apify_client.run(self._configuration.actor_run_id).update( + api_result = await self._apify_client.run(self.config.actor_run_id).update( status_message=status_message, is_status_message_terminal=is_terminal ) @@ -1201,7 +1223,7 @@ async def create_proxy_configuration( country_code=country_code, proxy_urls=proxy_urls, new_url_function=new_url_function, - _actor_config=self._configuration, + _actor_config=self.config, _apify_client=self._apify_client, ) diff --git a/src/apify/_configuration.py b/src/apify/_configuration.py index f7e4f028..069d3060 100644 --- a/src/apify/_configuration.py +++ b/src/apify/_configuration.py @@ -1,5 +1,6 @@ from __future__ import annotations +import traceback from datetime import datetime, timedelta from decimal import Decimal from logging import getLogger @@ -8,6 +9,8 @@ from pydantic import AliasChoices, BeforeValidator, Field, model_validator from typing_extensions import Self, deprecated +import crawlee +from crawlee._service_locator import ServiceLocator from crawlee._utils.models import timedelta_ms from crawlee._utils.urls import validate_http_url from crawlee.configuration import Configuration as CrawleeConfiguration @@ -417,6 +420,7 @@ def disable_browser_sandbox_on_platform(self) -> Self: """ if self.is_at_home and not self.disable_browser_sandbox: self.disable_browser_sandbox = True + logger.info('Stack trace:\n%s', ''.join(traceback.format_stack())) logger.warning('Actor is running on the Apify platform, `disable_browser_sandbox` was changed to True.') return self @@ -427,8 +431,33 @@ def get_global_configuration(cls) -> Configuration: Mostly for the backwards compatibility. It is recommended to use the `service_locator.get_configuration()` instead. """ - return cls() + return service_locator.get_configuration() -# Monkey-patch the base class so that it works with the extended configuration -CrawleeConfiguration.get_global_configuration = Configuration.get_global_configuration # type: ignore[method-assign] +class ApifyServiceLocator(ServiceLocator): + """Same as ServiceLocator from Crawlee, but it always returns Apify Configuration.""" + + def get_configuration(self) -> Configuration: + # ApifyServiceLocator can store any children of Crawlee Configuration, but in Apify context it is desired to + # return Apify Configuration. + if isinstance(self._configuration, Configuration): + # If Apify configuration was already stored in service locator, return it. + return self._configuration + + stored_configuration = super().get_configuration() + apify_configuration = Configuration() + + # Ensure the returned configuration is of type Apify Configuration. + # Most likely crawlee configuration was already set. Create Apify configuration from it. + # Due to known Pydantic issue https://github.com/pydantic/pydantic/issues/9516, creating new instance of + # Configuration from existing one in situation where environment can have some fields set by alias is very + # unpredictable. Use the stable workaround. + for name in stored_configuration.model_fields: + setattr(apify_configuration, name, getattr(stored_configuration, name)) + + return apify_configuration + + +# Ensure that ApifyServiceLocator is used to make sure Apify Configuration is used. +service_locator = ApifyServiceLocator() +crawlee.service_locator = service_locator diff --git a/src/apify/scrapy/extensions/_httpcache.py b/src/apify/scrapy/extensions/_httpcache.py index 14d8753d..678c93ed 100644 --- a/src/apify/scrapy/extensions/_httpcache.py +++ b/src/apify/scrapy/extensions/_httpcache.py @@ -56,7 +56,6 @@ async def open_kvs() -> KeyValueStore: storage_client = ApifyStorageClient() return await KeyValueStore.open( name=kvs_name, - configuration=configuration, storage_client=storage_client, ) return await KeyValueStore.open(name=kvs_name) diff --git a/src/apify/scrapy/scheduler.py b/src/apify/scrapy/scheduler.py index 2dcacd9a..95a4375e 100644 --- a/src/apify/scrapy/scheduler.py +++ b/src/apify/scrapy/scheduler.py @@ -53,7 +53,6 @@ async def open_rq() -> RequestQueue: if configuration.is_at_home: storage_client = ApifyStorageClient() return await RequestQueue.open( - configuration=configuration, storage_client=storage_client, ) return await RequestQueue.open() diff --git a/src/apify/storage_clients/_apify/_storage_client.py b/src/apify/storage_clients/_apify/_storage_client.py index 689e2c77..bcdc5e33 100644 --- a/src/apify/storage_clients/_apify/_storage_client.py +++ b/src/apify/storage_clients/_apify/_storage_client.py @@ -9,6 +9,7 @@ from ._dataset_client import ApifyDatasetClient from ._key_value_store_client import ApifyKeyValueStoreClient from ._request_queue_client import ApifyRequestQueueClient +from apify._configuration import service_locator from apify._utils import docs_group if TYPE_CHECKING: @@ -19,6 +20,15 @@ class ApifyStorageClient(StorageClient): """Apify storage client.""" + def __init__(self, configuration: Configuration | None = None) -> None: + """Initialize the file system storage client. + + Args: + configuration: Optional configuration instance to use with the storage client. + If not provided, the global configuration will be used. + """ + self._configuration = configuration or service_locator.get_configuration() + @override async def create_dataset_client( self, @@ -30,13 +40,12 @@ async def create_dataset_client( # Import here to avoid circular imports. from apify import Configuration as ApifyConfiguration # noqa: PLC0415 - configuration = configuration or ApifyConfiguration.get_global_configuration() - if isinstance(configuration, ApifyConfiguration): - return await ApifyDatasetClient.open(id=id, name=name, configuration=configuration) + if isinstance(self._configuration, ApifyConfiguration): + return await ApifyDatasetClient.open(id=id, name=name, configuration=self._configuration) raise TypeError( f'Expected "configuration" to be an instance of "apify.Configuration", ' - f'but got {type(configuration).__name__} instead.' + f'but got {type(self._configuration).__name__} instead.' ) @override @@ -50,13 +59,12 @@ async def create_kvs_client( # Import here to avoid circular imports. from apify import Configuration as ApifyConfiguration # noqa: PLC0415 - configuration = configuration or ApifyConfiguration.get_global_configuration() - if isinstance(configuration, ApifyConfiguration): - return await ApifyKeyValueStoreClient.open(id=id, name=name, configuration=configuration) + if isinstance(self._configuration, ApifyConfiguration): + return await ApifyKeyValueStoreClient.open(id=id, name=name, configuration=self._configuration) raise TypeError( f'Expected "configuration" to be an instance of "apify.Configuration", ' - f'but got {type(configuration).__name__} instead.' + f'but got {type(self._configuration).__name__} instead.' ) @override @@ -70,11 +78,15 @@ async def create_rq_client( # Import here to avoid circular imports. from apify import Configuration as ApifyConfiguration # noqa: PLC0415 - configuration = configuration or ApifyConfiguration.get_global_configuration() - if isinstance(configuration, ApifyConfiguration): - return await ApifyRequestQueueClient.open(id=id, name=name, configuration=configuration) + if isinstance(self._configuration, ApifyConfiguration): + return await ApifyRequestQueueClient.open(id=id, name=name, configuration=self._configuration) raise TypeError( f'Expected "configuration" to be an instance of "apify.Configuration", ' - f'but got {type(configuration).__name__} instead.' + f'but got {type(self._configuration).__name__} instead.' ) + + @override + def create_client(self, configuration: Configuration) -> ApifyStorageClient: + """Create a storage client from an existing storage client potentially just replacing the configuration.""" + return ApifyStorageClient(configuration) diff --git a/tests/integration/actor_source_base/requirements.txt b/tests/integration/actor_source_base/requirements.txt index f7ff2350..a72f6fb3 100644 --- a/tests/integration/actor_source_base/requirements.txt +++ b/tests/integration/actor_source_base/requirements.txt @@ -1,4 +1,4 @@ # The test fixture will put the Apify SDK wheel path on the next line APIFY_SDK_WHEEL_PLACEHOLDER uvicorn[standard] -crawlee[parsel]==1.0.0rc1 +crawlee[parsel] @ git+https://github.com/apify/crawlee-python.git@storage-clients-and-configurations diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index 9c230acd..518c3ee2 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -60,16 +60,8 @@ def _prepare_test_env() -> None: service_locator._storage_client = None service_locator._storage_instance_manager = None - # Reset the retrieval flags. - service_locator._configuration_was_retrieved = False - service_locator._event_manager_was_retrieved = False - service_locator._storage_client_was_retrieved = False - # Verify that the test environment was set up correctly. assert os.environ.get(ApifyEnvVars.LOCAL_STORAGE_DIR) == str(tmp_path) - assert service_locator._configuration_was_retrieved is False - assert service_locator._storage_client_was_retrieved is False - assert service_locator._event_manager_was_retrieved is False return _prepare_test_env diff --git a/tests/unit/actor/test_configuration.py b/tests/unit/actor/test_configuration.py index 95f19f12..4124561b 100644 --- a/tests/unit/actor/test_configuration.py +++ b/tests/unit/actor/test_configuration.py @@ -1,6 +1,12 @@ import pytest -from apify import Configuration +from crawlee.configuration import Configuration as CrawleeConfiguration +from crawlee.crawlers import BasicCrawler +from crawlee.errors import ServiceConflictError + +from apify import Actor +from apify import Configuration as ApifyConfiguration +from apify._configuration import service_locator @pytest.mark.parametrize( @@ -16,6 +22,140 @@ def test_disable_browser_sandbox( *, is_at_home: bool, disable_browser_sandbox_in: bool, disable_browser_sandbox_out: bool ) -> None: assert ( - Configuration(is_at_home=is_at_home, disable_browser_sandbox=disable_browser_sandbox_in).disable_browser_sandbox + ApifyConfiguration( + is_at_home=is_at_home, disable_browser_sandbox=disable_browser_sandbox_in + ).disable_browser_sandbox == disable_browser_sandbox_out ) + + +def test_apify_configuration_is_always_used() -> None: + """Set Crawlee Configuration in service_locator and verify that Apify Configuration is returned.""" + max_used_cpu_ratio = 0.123456 # Some unique value to verify configuration + service_locator.set_configuration(CrawleeConfiguration(max_used_cpu_ratio=max_used_cpu_ratio)) + + returned_config = service_locator.get_configuration() + assert returned_config.max_used_cpu_ratio == max_used_cpu_ratio + assert isinstance(returned_config, ApifyConfiguration) + + +async def test_existing_apify_config_respected_by_actor() -> None: + """Set Apify Configuration in service_locator and verify that Actor respects it.""" + max_used_cpu_ratio = 0.123456 # Some unique value to verify configuration + apify_config = ApifyConfiguration(max_used_cpu_ratio=max_used_cpu_ratio) + service_locator.set_configuration(apify_config) + async with Actor: + pass + + returned_config = service_locator.get_configuration() + assert returned_config is apify_config + + +async def test_existing_crawlee_config_respected_by_actor() -> None: + """Set Crawlee Configuration in service_locator and verify that Actor respects it.""" + max_used_cpu_ratio = 0.123456 # Some unique value to verify configuration + crawlee_config = CrawleeConfiguration(max_used_cpu_ratio=max_used_cpu_ratio) + service_locator.set_configuration(crawlee_config) + async with Actor: + pass + + returned_config = service_locator.get_configuration() + assert returned_config is not crawlee_config + assert isinstance(returned_config, ApifyConfiguration) + # Make sure the Crawlee Configuration was used to create returned Apify Configuration + assert returned_config.max_used_cpu_ratio == max_used_cpu_ratio + + +async def test_existing_apify_config_throws_error_when_set_in_actor() -> None: + """Test that passing explicit configuration to actor after service locator configuration was already set, + raises exception.""" + service_locator.set_configuration(ApifyConfiguration()) + with pytest.raises(ServiceConflictError): + async with Actor(configuration=ApifyConfiguration()): + pass + + +async def test_setting_config_after_actor_raises_exception() -> None: + """Test that passing setting configuration in service locator after actor wa created raises an exception.""" + async with Actor(): + with pytest.raises(ServiceConflictError): + service_locator.set_configuration(ApifyConfiguration()) + + +async def test_actor_using_input_configuration() -> None: + """Test that passing setting configuration in service locator after actor wa created raises an exception.""" + apify_config = ApifyConfiguration() + async with Actor(configuration=apify_config): + pass + + assert service_locator.get_configuration() is apify_config + + +async def test_crawler_implicit_configuration_through_actor() -> None: + """Test that crawler uses Actor configuration unless explicit configuration was passed to it.""" + apify_config = ApifyConfiguration() + async with Actor(configuration=apify_config): + crawler = BasicCrawler() + + assert crawler._service_locator.get_configuration() is apify_config + assert service_locator.get_configuration() is apify_config + + +async def test_crawler_implicit_configuration() -> None: + """Test that crawler and Actor use implicit service_locator based configuration unless explicit configuration + was passed to them.""" + async with Actor(): + crawler_1 = BasicCrawler() + + assert service_locator.get_configuration() is crawler_1._service_locator.get_configuration() + + +async def test_crawlers_own_configuration() -> None: + """Test that crawlers can use own configurations without crashing.""" + config_actor = ApifyConfiguration() + apify_crawler_1 = ApifyConfiguration() + apify_crawler_2 = ApifyConfiguration() + + async with Actor(configuration=config_actor): + crawler_1 = BasicCrawler(configuration=apify_crawler_1) + crawler_2 = BasicCrawler(configuration=apify_crawler_2) + + assert service_locator.get_configuration() is config_actor + assert crawler_1._service_locator.get_configuration() is apify_crawler_1 + assert crawler_2._service_locator.get_configuration() is apify_crawler_2 + + +async def test_crawler_global_configuration() -> None: + """Test that crawler and Actor use explicit service_locator based configuration unless explicit configuration + was passed to them.""" + config_global = ApifyConfiguration() + service_locator.set_configuration(config_global) + + async with Actor(): + crawler_1 = BasicCrawler() + + assert service_locator.get_configuration() is config_global + assert crawler_1._service_locator.get_configuration() is config_global + + +async def test_storage_retrieved_is_different_with_different_config() -> None: + """Test that retrieving storage depends on used configuration.""" + config_actor = ApifyConfiguration() + apify_crawler_1 = ApifyConfiguration() + + async with Actor(configuration=config_actor): + actor_kvs = await Actor.open_key_value_store() + crawler_1 = BasicCrawler(configuration=apify_crawler_1) + crawler_kvs = await crawler_1.get_key_value_store() + + assert actor_kvs is not crawler_kvs + + +async def test_storage_retrieved_is_same_with_same_config() -> None: + """Test that retrieving storage is same if same configuration is used.""" + async with Actor(): + actor_kvs = await Actor.open_key_value_store() + crawler_1 = BasicCrawler() + crawler_kvs = await crawler_1.get_key_value_store() + + assert actor_kvs is crawler_kvs diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py index 02b8868e..2bbb4545 100644 --- a/tests/unit/conftest.py +++ b/tests/unit/conftest.py @@ -13,9 +13,9 @@ from apify_client import ApifyClientAsync from apify_shared.consts import ApifyEnvVars -from crawlee import service_locator import apify._actor +from apify._configuration import service_locator if TYPE_CHECKING: from collections.abc import Callable, Iterator @@ -51,16 +51,8 @@ def _prepare_test_env() -> None: service_locator._storage_client = None service_locator._storage_instance_manager = None - # Reset the retrieval flags. - service_locator._configuration_was_retrieved = False - service_locator._event_manager_was_retrieved = False - service_locator._storage_client_was_retrieved = False - # Verify that the test environment was set up correctly. assert os.environ.get(ApifyEnvVars.LOCAL_STORAGE_DIR) == str(tmp_path) - assert service_locator._configuration_was_retrieved is False - assert service_locator._storage_client_was_retrieved is False - assert service_locator._event_manager_was_retrieved is False return _prepare_test_env diff --git a/uv.lock b/uv.lock index 0df8ba86..7f87ef0e 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 3 +revision = 2 requires-python = ">=3.10" [[package]] @@ -76,7 +76,7 @@ requires-dist = [ { name = "apify-client", specifier = ">=2.0.0,<3.0.0" }, { name = "apify-shared", specifier = ">=2.0.0,<3.0.0" }, { name = "cachetools", specifier = ">=5.5.0" }, - { name = "crawlee", specifier = "==1.0.0rc1" }, + { name = "crawlee", git = "https://github.com/apify/crawlee-python.git?rev=storage-clients-and-configurations" }, { name = "cryptography", specifier = ">=42.0.0" }, { name = "impit", specifier = ">=0.5.3" }, { name = "lazy-object-proxy", specifier = "<1.11.0" }, @@ -477,8 +477,8 @@ toml = [ [[package]] name = "crawlee" -version = "1.0.0rc1" -source = { registry = "https://pypi.org/simple" } +version = "0.6.13" +source = { git = "https://github.com/apify/crawlee-python.git?rev=storage-clients-and-configurations#430f2ad7e7d266ad35abe97b64a9709d81aab7f1" } dependencies = [ { name = "cachetools" }, { name = "colorama" }, @@ -493,10 +493,6 @@ dependencies = [ { name = "typing-extensions" }, { name = "yarl" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/1e/1d/31d7710b54c78d12cdc359f8f30478714768cfdab669f4464a8632bb5db6/crawlee-1.0.0rc1.tar.gz", hash = "sha256:bf644826a030fb01c1c525d7da1a73f4ce3fb89671eca9544aa0fccc5e9eaaa6", size = 24822393, upload-time = "2025-08-22T06:46:29.831Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/7e/68/fcb616bd86782c1445ad8b6d3b5ec40ce970adcda755deb5cc9347ba9fb0/crawlee-1.0.0rc1-py3-none-any.whl", hash = "sha256:748e54aea1884b2cc49e4cebbfb1842159dd2b93ae17284cd947fa8a066d137f", size = 274346, upload-time = "2025-08-22T06:46:27.035Z" }, -] [package.optional-dependencies] parsel = [ @@ -1593,16 +1589,15 @@ wheels = [ [[package]] name = "pydantic-settings" -version = "2.10.1" +version = "2.6.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "pydantic" }, { name = "python-dotenv" }, - { name = "typing-inspection" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/68/85/1ea668bbab3c50071ca613c6ab30047fb36ab0da1b92fa8f17bbc38fd36c/pydantic_settings-2.10.1.tar.gz", hash = "sha256:06f0062169818d0f5524420a360d632d5857b83cffd4d42fe29597807a1614ee", size = 172583, upload-time = "2025-06-24T13:26:46.841Z" } +sdist = { url = "https://files.pythonhosted.org/packages/b5/d4/9dfbe238f45ad8b168f5c96ee49a3df0598ce18a0795a983b419949ce65b/pydantic_settings-2.6.1.tar.gz", hash = "sha256:e0f92546d8a9923cb8941689abf85d6601a8c19a23e97a34b2964a2e3f813ca0", size = 75646, upload-time = "2024-11-01T11:00:05.17Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/58/f0/427018098906416f580e3cf1366d3b1abfb408a0652e9f31600c24a1903c/pydantic_settings-2.10.1-py3-none-any.whl", hash = "sha256:a60952460b99cf661dc25c29c0ef171721f98bfcb52ef8d9ea4c943d7c8cc796", size = 45235, upload-time = "2025-06-24T13:26:45.485Z" }, + { url = "https://files.pythonhosted.org/packages/5e/f9/ff95fd7d760af42f647ea87f9b8a383d891cdb5e5dbd4613edaeb094252a/pydantic_settings-2.6.1-py3-none-any.whl", hash = "sha256:7fb0637c786a558d3103436278a7c4f1cfd29ba8973238a50c5bb9a55387da87", size = 28595, upload-time = "2024-11-01T11:00:02.64Z" }, ] [[package]]