diff --git a/tests/integration/test_actor_api_helpers.py b/tests/integration/test_actor_api_helpers.py index 8073e292..7e88251a 100644 --- a/tests/integration/test_actor_api_helpers.py +++ b/tests/integration/test_actor_api_helpers.py @@ -15,438 +15,417 @@ from .conftest import ActorFactory -class TestActorIsAtHome: - async def test_actor_is_at_home(self: TestActorIsAtHome, make_actor: ActorFactory) -> None: - async def main() -> None: - async with Actor: - assert Actor.is_at_home() is True +async def test_actor_reports_running_on_platform(make_actor: ActorFactory) -> None: + async def main() -> None: + async with Actor: + assert Actor.is_at_home() is True - actor = await make_actor('is-at-home', main_func=main) + actor = await make_actor('is-at-home', main_func=main) - run_result = await actor.call() + run_result = await actor.call() - assert run_result is not None - assert run_result['status'] == 'SUCCEEDED' + assert run_result is not None + assert run_result['status'] == 'SUCCEEDED' -class TestActorGetEnv: - async def test_actor_get_env(self: TestActorGetEnv, make_actor: ActorFactory) -> None: - async def main() -> None: - async with Actor: - env_dict = Actor.get_env() +async def test_actor_retrieves_env_vars(make_actor: ActorFactory) -> None: + async def main() -> None: + async with Actor: + env_dict = Actor.get_env() - assert env_dict.get('is_at_home') is True - assert env_dict.get('token') is not None - assert env_dict.get('actor_events_ws_url') is not None - assert env_dict.get('input_key') is not None + assert env_dict.get('is_at_home') is True + assert env_dict.get('token') is not None + assert env_dict.get('actor_events_ws_url') is not None + assert env_dict.get('input_key') is not None - assert len(env_dict.get('actor_id', '')) == 17 - assert len(env_dict.get('actor_run_id', '')) == 17 - assert len(env_dict.get('user_id', '')) == 17 - assert len(env_dict.get('default_dataset_id', '')) == 17 - assert len(env_dict.get('default_key_value_store_id', '')) == 17 - assert len(env_dict.get('default_request_queue_id', '')) == 17 + assert len(env_dict.get('actor_id', '')) == 17 + assert len(env_dict.get('actor_run_id', '')) == 17 + assert len(env_dict.get('user_id', '')) == 17 + assert len(env_dict.get('default_dataset_id', '')) == 17 + assert len(env_dict.get('default_key_value_store_id', '')) == 17 + assert len(env_dict.get('default_request_queue_id', '')) == 17 - actor = await make_actor('get-env', main_func=main) + actor = await make_actor('get-env', main_func=main) - run_result = await actor.call() + run_result = await actor.call() - assert run_result is not None - assert run_result['status'] == 'SUCCEEDED' + assert run_result is not None + assert run_result['status'] == 'SUCCEEDED' -class TestActorNewClient: - async def test_actor_new_client(self: TestActorNewClient, make_actor: ActorFactory) -> None: - async def main() -> None: - import os +async def test_actor_creates_new_client_instance(make_actor: ActorFactory) -> None: + async def main() -> None: + import os - from apify_shared.consts import ActorEnvVars + from apify_shared.consts import ActorEnvVars - async with Actor: - new_client = Actor.new_client() - assert new_client is not Actor.apify_client + async with Actor: + new_client = Actor.new_client() + assert new_client is not Actor.apify_client - default_key_value_store_id = os.getenv(ActorEnvVars.DEFAULT_KEY_VALUE_STORE_ID) - assert default_key_value_store_id is not None - kv_store_client = new_client.key_value_store(default_key_value_store_id) - await kv_store_client.set_record('OUTPUT', 'TESTING-OUTPUT') + default_key_value_store_id = os.getenv(ActorEnvVars.DEFAULT_KEY_VALUE_STORE_ID) + assert default_key_value_store_id is not None + kv_store_client = new_client.key_value_store(default_key_value_store_id) + await kv_store_client.set_record('OUTPUT', 'TESTING-OUTPUT') - actor = await make_actor('new-client', main_func=main) + actor = await make_actor('new-client', main_func=main) - run_result = await actor.call() + run_result = await actor.call() - assert run_result is not None - assert run_result['status'] == 'SUCCEEDED' + assert run_result is not None + assert run_result['status'] == 'SUCCEEDED' - output_record = await actor.last_run().key_value_store().get_record('OUTPUT') - assert output_record is not None - assert output_record['value'] == 'TESTING-OUTPUT' + output_record = await actor.last_run().key_value_store().get_record('OUTPUT') + assert output_record is not None + assert output_record['value'] == 'TESTING-OUTPUT' -class TestActorSetStatusMessage: - async def test_actor_set_status_message(self: TestActorSetStatusMessage, make_actor: ActorFactory) -> None: - async def main() -> None: - async with Actor: - actor_input = await Actor.get_input() or {} - await Actor.set_status_message('testing-status-message', **actor_input) +async def test_actor_sets_status_message(make_actor: ActorFactory) -> None: + async def main() -> None: + async with Actor: + actor_input = await Actor.get_input() or {} + await Actor.set_status_message('testing-status-message', **actor_input) - actor = await make_actor('set-status-message', main_func=main) + actor = await make_actor('set-status-message', main_func=main) - run_result = await actor.call() + run_result = await actor.call() - assert run_result is not None - assert run_result['status'] == 'SUCCEEDED' - assert run_result['statusMessage'] == 'testing-status-message' - assert run_result['isStatusMessageTerminal'] is None + assert run_result is not None + assert run_result['status'] == 'SUCCEEDED' + assert run_result['statusMessage'] == 'testing-status-message' + assert run_result['isStatusMessageTerminal'] is None - run_result = await actor.call(run_input={'is_terminal': True}) + run_result = await actor.call(run_input={'is_terminal': True}) - assert run_result is not None - assert run_result['status'] == 'SUCCEEDED' - assert run_result['statusMessage'] == 'testing-status-message' - assert run_result['isStatusMessageTerminal'] is True + assert run_result is not None + assert run_result['status'] == 'SUCCEEDED' + assert run_result['statusMessage'] == 'testing-status-message' + assert run_result['isStatusMessageTerminal'] is True -class TestActorStart: - async def test_actor_start(self: TestActorStart, make_actor: ActorFactory) -> None: - async def main_inner() -> None: - async with Actor: - await asyncio.sleep(5) - actor_input = await Actor.get_input() or {} - test_value = actor_input.get('test_value') - await Actor.set_value('OUTPUT', f'{test_value}_XXX_{test_value}') +async def test_actor_starts_another_actor_instance(make_actor: ActorFactory) -> None: + async def main_inner() -> None: + async with Actor: + await asyncio.sleep(5) + actor_input = await Actor.get_input() or {} + test_value = actor_input.get('test_value') + await Actor.set_value('OUTPUT', f'{test_value}_XXX_{test_value}') - async def main_outer() -> None: - async with Actor: - actor_input = await Actor.get_input() or {} - inner_actor_id = actor_input.get('inner_actor_id') - test_value = actor_input.get('test_value') + async def main_outer() -> None: + async with Actor: + actor_input = await Actor.get_input() or {} + inner_actor_id = actor_input.get('inner_actor_id') + test_value = actor_input.get('test_value') - assert inner_actor_id is not None + assert inner_actor_id is not None - await Actor.start(inner_actor_id, run_input={'test_value': test_value}) + await Actor.start(inner_actor_id, run_input={'test_value': test_value}) - inner_run_status = await Actor.apify_client.actor(inner_actor_id).last_run().get() - assert inner_run_status is not None - assert inner_run_status.get('status') in ['READY', 'RUNNING'] + inner_run_status = await Actor.apify_client.actor(inner_actor_id).last_run().get() + assert inner_run_status is not None + assert inner_run_status.get('status') in ['READY', 'RUNNING'] - inner_actor = await make_actor('start-inner', main_func=main_inner) - outer_actor = await make_actor('start-outer', main_func=main_outer) + inner_actor = await make_actor('start-inner', main_func=main_inner) + outer_actor = await make_actor('start-outer', main_func=main_outer) - inner_actor_id = (await inner_actor.get() or {})['id'] - test_value = crypto_random_object_id() + inner_actor_id = (await inner_actor.get() or {})['id'] + test_value = crypto_random_object_id() - outer_run_result = await outer_actor.call( - run_input={'test_value': test_value, 'inner_actor_id': inner_actor_id} - ) + outer_run_result = await outer_actor.call(run_input={'test_value': test_value, 'inner_actor_id': inner_actor_id}) - assert outer_run_result is not None - assert outer_run_result['status'] == 'SUCCEEDED' + assert outer_run_result is not None + assert outer_run_result['status'] == 'SUCCEEDED' - await inner_actor.last_run().wait_for_finish() + await inner_actor.last_run().wait_for_finish() - inner_output_record = await inner_actor.last_run().key_value_store().get_record('OUTPUT') - assert inner_output_record is not None - assert inner_output_record['value'] == f'{test_value}_XXX_{test_value}' + inner_output_record = await inner_actor.last_run().key_value_store().get_record('OUTPUT') + assert inner_output_record is not None + assert inner_output_record['value'] == f'{test_value}_XXX_{test_value}' -class TestActorCall: - async def test_actor_call(self: TestActorCall, make_actor: ActorFactory) -> None: - async def main_inner() -> None: - async with Actor: - await asyncio.sleep(5) - actor_input = await Actor.get_input() or {} - test_value = actor_input.get('test_value') - await Actor.set_value('OUTPUT', f'{test_value}_XXX_{test_value}') +async def test_actor_calls_another_actor(make_actor: ActorFactory) -> None: + async def main_inner() -> None: + async with Actor: + await asyncio.sleep(5) + actor_input = await Actor.get_input() or {} + test_value = actor_input.get('test_value') + await Actor.set_value('OUTPUT', f'{test_value}_XXX_{test_value}') - async def main_outer() -> None: - async with Actor: - actor_input = await Actor.get_input() or {} - inner_actor_id = actor_input.get('inner_actor_id') - test_value = actor_input.get('test_value') + async def main_outer() -> None: + async with Actor: + actor_input = await Actor.get_input() or {} + inner_actor_id = actor_input.get('inner_actor_id') + test_value = actor_input.get('test_value') - assert inner_actor_id is not None + assert inner_actor_id is not None - await Actor.call(inner_actor_id, run_input={'test_value': test_value}) + await Actor.call(inner_actor_id, run_input={'test_value': test_value}) - inner_run_status = await Actor.apify_client.actor(inner_actor_id).last_run().get() - assert inner_run_status is not None - assert inner_run_status.get('status') == 'SUCCEEDED' + inner_run_status = await Actor.apify_client.actor(inner_actor_id).last_run().get() + assert inner_run_status is not None + assert inner_run_status.get('status') == 'SUCCEEDED' - inner_actor = await make_actor('call-inner', main_func=main_inner) - outer_actor = await make_actor('call-outer', main_func=main_outer) + inner_actor = await make_actor('call-inner', main_func=main_inner) + outer_actor = await make_actor('call-outer', main_func=main_outer) - inner_actor_id = (await inner_actor.get() or {})['id'] - test_value = crypto_random_object_id() + inner_actor_id = (await inner_actor.get() or {})['id'] + test_value = crypto_random_object_id() - outer_run_result = await outer_actor.call( - run_input={'test_value': test_value, 'inner_actor_id': inner_actor_id} - ) + outer_run_result = await outer_actor.call(run_input={'test_value': test_value, 'inner_actor_id': inner_actor_id}) - assert outer_run_result is not None - assert outer_run_result['status'] == 'SUCCEEDED' + assert outer_run_result is not None + assert outer_run_result['status'] == 'SUCCEEDED' - await inner_actor.last_run().wait_for_finish() + await inner_actor.last_run().wait_for_finish() - inner_output_record = await inner_actor.last_run().key_value_store().get_record('OUTPUT') - assert inner_output_record is not None - assert inner_output_record['value'] == f'{test_value}_XXX_{test_value}' + inner_output_record = await inner_actor.last_run().key_value_store().get_record('OUTPUT') + assert inner_output_record is not None + assert inner_output_record['value'] == f'{test_value}_XXX_{test_value}' -class TestActorCallTask: - async def test_actor_call_task( - self: TestActorCallTask, - make_actor: ActorFactory, - apify_client_async: ApifyClientAsync, - ) -> None: - async def main_inner() -> None: - async with Actor: - await asyncio.sleep(5) - actor_input = await Actor.get_input() or {} - test_value = actor_input.get('test_value') - await Actor.set_value('OUTPUT', f'{test_value}_XXX_{test_value}') +async def test_actor_calls_task(make_actor: ActorFactory, apify_client_async: ApifyClientAsync) -> None: + async def main_inner() -> None: + async with Actor: + await asyncio.sleep(5) + actor_input = await Actor.get_input() or {} + test_value = actor_input.get('test_value') + await Actor.set_value('OUTPUT', f'{test_value}_XXX_{test_value}') - async def main_outer() -> None: - async with Actor: - actor_input = await Actor.get_input() or {} - inner_task_id = actor_input.get('inner_task_id') + async def main_outer() -> None: + async with Actor: + actor_input = await Actor.get_input() or {} + inner_task_id = actor_input.get('inner_task_id') - assert inner_task_id is not None + assert inner_task_id is not None - await Actor.call_task(inner_task_id) + await Actor.call_task(inner_task_id) - inner_run_status = await Actor.apify_client.task(inner_task_id).last_run().get() - assert inner_run_status is not None - assert inner_run_status.get('status') == 'SUCCEEDED' + inner_run_status = await Actor.apify_client.task(inner_task_id).last_run().get() + assert inner_run_status is not None + assert inner_run_status.get('status') == 'SUCCEEDED' - inner_actor = await make_actor('call-task-inner', main_func=main_inner) - outer_actor = await make_actor('call-task-outer', main_func=main_outer) + inner_actor = await make_actor('call-task-inner', main_func=main_inner) + outer_actor = await make_actor('call-task-outer', main_func=main_outer) - inner_actor_id = (await inner_actor.get() or {})['id'] - test_value = crypto_random_object_id() + inner_actor_id = (await inner_actor.get() or {})['id'] + test_value = crypto_random_object_id() - task = await apify_client_async.tasks().create( - actor_id=inner_actor_id, - name=generate_unique_resource_name('actor-call-task'), - task_input={'test_value': test_value}, - ) + task = await apify_client_async.tasks().create( + actor_id=inner_actor_id, + name=generate_unique_resource_name('actor-call-task'), + task_input={'test_value': test_value}, + ) - outer_run_result = await outer_actor.call(run_input={'test_value': test_value, 'inner_task_id': task['id']}) + outer_run_result = await outer_actor.call(run_input={'test_value': test_value, 'inner_task_id': task['id']}) - assert outer_run_result is not None - assert outer_run_result['status'] == 'SUCCEEDED' + assert outer_run_result is not None + assert outer_run_result['status'] == 'SUCCEEDED' - await inner_actor.last_run().wait_for_finish() + await inner_actor.last_run().wait_for_finish() - inner_output_record = await inner_actor.last_run().key_value_store().get_record('OUTPUT') - assert inner_output_record is not None - assert inner_output_record['value'] == f'{test_value}_XXX_{test_value}' + inner_output_record = await inner_actor.last_run().key_value_store().get_record('OUTPUT') + assert inner_output_record is not None + assert inner_output_record['value'] == f'{test_value}_XXX_{test_value}' - await apify_client_async.task(task['id']).delete() + await apify_client_async.task(task['id']).delete() -class TestActorAbort: - async def test_actor_abort(self: TestActorAbort, make_actor: ActorFactory) -> None: - async def main_inner() -> None: - async with Actor: - await asyncio.sleep(180) - # This should not be set, the actor should be aborted by now - await Actor.set_value('OUTPUT', 'dummy') +async def test_actor_aborts_another_actor_run(make_actor: ActorFactory) -> None: + async def main_inner() -> None: + async with Actor: + await asyncio.sleep(180) + # This should not be set, the actor should be aborted by now + await Actor.set_value('OUTPUT', 'dummy') - async def main_outer() -> None: - async with Actor: - actor_input = await Actor.get_input() or {} - inner_run_id = actor_input.get('inner_run_id') + async def main_outer() -> None: + async with Actor: + actor_input = await Actor.get_input() or {} + inner_run_id = actor_input.get('inner_run_id') - assert inner_run_id is not None + assert inner_run_id is not None - await Actor.abort(inner_run_id) + await Actor.abort(inner_run_id) - inner_actor = await make_actor('abort-inner', main_func=main_inner) - outer_actor = await make_actor('abort-outer', main_func=main_outer) + inner_actor = await make_actor('abort-inner', main_func=main_inner) + outer_actor = await make_actor('abort-outer', main_func=main_outer) - inner_run_id = (await inner_actor.start())['id'] + inner_run_id = (await inner_actor.start())['id'] - outer_run_result = await outer_actor.call(run_input={'inner_run_id': inner_run_id}) + outer_run_result = await outer_actor.call(run_input={'inner_run_id': inner_run_id}) - assert outer_run_result is not None - assert outer_run_result['status'] == 'SUCCEEDED' + assert outer_run_result is not None + assert outer_run_result['status'] == 'SUCCEEDED' - await inner_actor.last_run().wait_for_finish() - inner_actor_last_run = await inner_actor.last_run().get() - assert inner_actor_last_run is not None - assert inner_actor_last_run['status'] == 'ABORTED' + await inner_actor.last_run().wait_for_finish() + inner_actor_last_run = await inner_actor.last_run().get() + assert inner_actor_last_run is not None + assert inner_actor_last_run['status'] == 'ABORTED' - inner_output_record = await inner_actor.last_run().key_value_store().get_record('OUTPUT') - assert inner_output_record is None + inner_output_record = await inner_actor.last_run().key_value_store().get_record('OUTPUT') + assert inner_output_record is None -class TestActorMetamorph: - async def test_actor_metamorph(self: TestActorMetamorph, make_actor: ActorFactory) -> None: - async def main_inner() -> None: - import os +async def test_actor_metamorphs_into_another_actor(make_actor: ActorFactory) -> None: + async def main_inner() -> None: + import os - from apify_shared.consts import ActorEnvVars + from apify_shared.consts import ActorEnvVars - async with Actor: - assert os.getenv(ActorEnvVars.INPUT_KEY) is not None - assert os.getenv(ActorEnvVars.INPUT_KEY) != 'INPUT' - actor_input = await Actor.get_input() or {} + async with Actor: + assert os.getenv(ActorEnvVars.INPUT_KEY) is not None + assert os.getenv(ActorEnvVars.INPUT_KEY) != 'INPUT' + actor_input = await Actor.get_input() or {} - test_value = actor_input.get('test_value', '') - assert test_value.endswith('_BEFORE_METAMORPH') + test_value = actor_input.get('test_value', '') + assert test_value.endswith('_BEFORE_METAMORPH') - output = test_value.replace('_BEFORE_METAMORPH', '_AFTER_METAMORPH') - await Actor.set_value('OUTPUT', output) + output = test_value.replace('_BEFORE_METAMORPH', '_AFTER_METAMORPH') + await Actor.set_value('OUTPUT', output) - async def main_outer() -> None: - async with Actor: - actor_input = await Actor.get_input() or {} - inner_actor_id = actor_input.get('inner_actor_id') - test_value = actor_input.get('test_value') - new_test_value = f'{test_value}_BEFORE_METAMORPH' + async def main_outer() -> None: + async with Actor: + actor_input = await Actor.get_input() or {} + inner_actor_id = actor_input.get('inner_actor_id') + test_value = actor_input.get('test_value') + new_test_value = f'{test_value}_BEFORE_METAMORPH' - assert inner_actor_id is not None + assert inner_actor_id is not None - await Actor.metamorph(inner_actor_id, run_input={'test_value': new_test_value}) + await Actor.metamorph(inner_actor_id, run_input={'test_value': new_test_value}) - # This should not be called - await Actor.set_value('RECORD_AFTER_METAMORPH_CALL', 'dummy') - raise AssertionError('The Actor should have been metamorphed by now') + # This should not be called + await Actor.set_value('RECORD_AFTER_METAMORPH_CALL', 'dummy') + raise AssertionError('The Actor should have been metamorphed by now') - inner_actor = await make_actor('metamorph-inner', main_func=main_inner) - outer_actor = await make_actor('metamorph-outer', main_func=main_outer) + inner_actor = await make_actor('metamorph-inner', main_func=main_inner) + outer_actor = await make_actor('metamorph-outer', main_func=main_outer) - inner_actor_id = (await inner_actor.get() or {})['id'] - test_value = crypto_random_object_id() + inner_actor_id = (await inner_actor.get() or {})['id'] + test_value = crypto_random_object_id() - outer_run_result = await outer_actor.call( - run_input={'test_value': test_value, 'inner_actor_id': inner_actor_id} - ) + outer_run_result = await outer_actor.call(run_input={'test_value': test_value, 'inner_actor_id': inner_actor_id}) - assert outer_run_result is not None - assert outer_run_result['status'] == 'SUCCEEDED' + assert outer_run_result is not None + assert outer_run_result['status'] == 'SUCCEEDED' - outer_run_key_value_store = outer_actor.last_run().key_value_store() + outer_run_key_value_store = outer_actor.last_run().key_value_store() - outer_output_record = await outer_run_key_value_store.get_record('OUTPUT') - assert outer_output_record is not None - assert outer_output_record['value'] == f'{test_value}_AFTER_METAMORPH' + outer_output_record = await outer_run_key_value_store.get_record('OUTPUT') + assert outer_output_record is not None + assert outer_output_record['value'] == f'{test_value}_AFTER_METAMORPH' - assert await outer_run_key_value_store.get_record('RECORD_AFTER_METAMORPH_CALL') is None + assert await outer_run_key_value_store.get_record('RECORD_AFTER_METAMORPH_CALL') is None - # After metamorph, the run still belongs to the original Actor, so the inner one should have no runs - assert await inner_actor.last_run().get() is None + # After metamorph, the run still belongs to the original Actor, so the inner one should have no runs + assert await inner_actor.last_run().get() is None -class TestActorReboot: - async def test_actor_reboot(self: TestActorReboot, make_actor: ActorFactory) -> None: - async def main() -> None: - async with Actor: - print('Starting...') - cnt = await Actor.get_value('reboot_counter', 0) +async def test_actor_reboots_successfully(make_actor: ActorFactory) -> None: + async def main() -> None: + async with Actor: + print('Starting...') + cnt = await Actor.get_value('reboot_counter', 0) - if cnt < 2: - print(f'Rebooting (cnt = {cnt})...') - await Actor.set_value('reboot_counter', cnt + 1) - await Actor.reboot() - await Actor.set_value('THIS_KEY_SHOULD_NOT_BE_WRITTEN', 'XXX') + if cnt < 2: + print(f'Rebooting (cnt = {cnt})...') + await Actor.set_value('reboot_counter', cnt + 1) + await Actor.reboot() + await Actor.set_value('THIS_KEY_SHOULD_NOT_BE_WRITTEN', 'XXX') - print('Finishing...') + print('Finishing...') - actor = await make_actor('actor_rebooter', main_func=main) - run_result = await actor.call(run_input={'counter_key': 'reboot_counter'}) + actor = await make_actor('actor_rebooter', main_func=main) + run_result = await actor.call(run_input={'counter_key': 'reboot_counter'}) - assert run_result is not None - assert run_result['status'] == 'SUCCEEDED' + assert run_result is not None + assert run_result['status'] == 'SUCCEEDED' - not_written_value = await actor.last_run().key_value_store().get_record('THIS_KEY_SHOULD_NOT_BE_WRITTEN') - assert not_written_value is None + not_written_value = await actor.last_run().key_value_store().get_record('THIS_KEY_SHOULD_NOT_BE_WRITTEN') + assert not_written_value is None - reboot_counter = await actor.last_run().key_value_store().get_record('reboot_counter') - assert reboot_counter is not None - assert reboot_counter['value'] == 2 + reboot_counter = await actor.last_run().key_value_store().get_record('reboot_counter') + assert reboot_counter is not None + assert reboot_counter['value'] == 2 -class TestActorAddWebhook: - async def test_actor_add_webhook(self: TestActorAddWebhook, make_actor: ActorFactory) -> None: - async def main_server() -> None: - import os - from http.server import BaseHTTPRequestHandler, HTTPServer +async def test_actor_adds_webhook_and_receives_event(make_actor: ActorFactory) -> None: + async def main_server() -> None: + import os + from http.server import BaseHTTPRequestHandler, HTTPServer - from apify_shared.consts import ActorEnvVars + from apify_shared.consts import ActorEnvVars - webhook_body = '' + webhook_body = '' - async with Actor: + async with Actor: - class WebhookHandler(BaseHTTPRequestHandler): - def do_GET(self) -> None: # noqa: N802 - self.send_response(200) - self.end_headers() - self.wfile.write(bytes('Hello, world!', encoding='utf-8')) + class WebhookHandler(BaseHTTPRequestHandler): + def do_GET(self) -> None: # noqa: N802 + self.send_response(200) + self.end_headers() + self.wfile.write(bytes('Hello, world!', encoding='utf-8')) - def do_POST(self) -> None: # noqa: N802 - nonlocal webhook_body - content_length = self.headers.get('content-length') - length = int(content_length) if content_length else 0 + def do_POST(self) -> None: # noqa: N802 + nonlocal webhook_body + content_length = self.headers.get('content-length') + length = int(content_length) if content_length else 0 - webhook_body = self.rfile.read(length).decode('utf-8') + webhook_body = self.rfile.read(length).decode('utf-8') - self.send_response(200) - self.end_headers() - self.wfile.write(bytes('Hello, world!', encoding='utf-8')) + self.send_response(200) + self.end_headers() + self.wfile.write(bytes('Hello, world!', encoding='utf-8')) - container_port = int(os.getenv(ActorEnvVars.WEB_SERVER_PORT, '')) - with HTTPServer(('', container_port), WebhookHandler) as server: - await Actor.set_value('INITIALIZED', value=True) - while not webhook_body: - server.handle_request() + container_port = int(os.getenv(ActorEnvVars.WEB_SERVER_PORT, '')) + with HTTPServer(('', container_port), WebhookHandler) as server: + await Actor.set_value('INITIALIZED', value=True) + while not webhook_body: + server.handle_request() - await Actor.set_value('WEBHOOK_BODY', webhook_body) + await Actor.set_value('WEBHOOK_BODY', webhook_body) - async def main_client() -> None: - from apify import Webhook, WebhookEventType + async def main_client() -> None: + from apify import Webhook, WebhookEventType - async with Actor: - actor_input = await Actor.get_input() or {} - server_actor_container_url = str(actor_input.get('server_actor_container_url')) + async with Actor: + actor_input = await Actor.get_input() or {} + server_actor_container_url = str(actor_input.get('server_actor_container_url')) - await Actor.add_webhook( - Webhook( - event_types=[WebhookEventType.ACTOR_RUN_SUCCEEDED], - request_url=server_actor_container_url, - ) + await Actor.add_webhook( + Webhook( + event_types=[WebhookEventType.ACTOR_RUN_SUCCEEDED], + request_url=server_actor_container_url, ) + ) - server_actor, client_actor = await asyncio.gather( - make_actor('add-webhook-server', main_func=main_server), - make_actor('add-webhook-client', main_func=main_client), - ) + server_actor, client_actor = await asyncio.gather( + make_actor('add-webhook-server', main_func=main_server), + make_actor('add-webhook-client', main_func=main_client), + ) - server_actor_run = await server_actor.start() - server_actor_container_url = server_actor_run['containerUrl'] + server_actor_run = await server_actor.start() + server_actor_container_url = server_actor_run['containerUrl'] - # Give the server actor some time to start running + # Give the server actor some time to start running + server_actor_initialized = await server_actor.last_run().key_value_store().get_record('INITIALIZED') + while not server_actor_initialized: server_actor_initialized = await server_actor.last_run().key_value_store().get_record('INITIALIZED') - while not server_actor_initialized: - server_actor_initialized = await server_actor.last_run().key_value_store().get_record('INITIALIZED') - await asyncio.sleep(1) - - client_actor_run_result = await client_actor.call( - run_input={'server_actor_container_url': server_actor_container_url} - ) - assert client_actor_run_result is not None - assert client_actor_run_result['status'] == 'SUCCEEDED' - - server_actor_run_result = await server_actor.last_run().wait_for_finish() - assert server_actor_run_result is not None - assert server_actor_run_result['status'] == 'SUCCEEDED' - - webhook_body_record = await server_actor.last_run().key_value_store().get_record('WEBHOOK_BODY') - assert webhook_body_record is not None - assert webhook_body_record['value'] != '' - parsed_webhook_body = json.loads(webhook_body_record['value']) - - assert parsed_webhook_body['eventData']['actorId'] == client_actor_run_result['actId'] - assert parsed_webhook_body['eventData']['actorRunId'] == client_actor_run_result['id'] + await asyncio.sleep(1) + + client_actor_run_result = await client_actor.call( + run_input={'server_actor_container_url': server_actor_container_url} + ) + assert client_actor_run_result is not None + assert client_actor_run_result['status'] == 'SUCCEEDED' + + server_actor_run_result = await server_actor.last_run().wait_for_finish() + assert server_actor_run_result is not None + assert server_actor_run_result['status'] == 'SUCCEEDED' + + webhook_body_record = await server_actor.last_run().key_value_store().get_record('WEBHOOK_BODY') + assert webhook_body_record is not None + assert webhook_body_record['value'] != '' + parsed_webhook_body = json.loads(webhook_body_record['value']) + + assert parsed_webhook_body['eventData']['actorId'] == client_actor_run_result['actId'] + assert parsed_webhook_body['eventData']['actorRunId'] == client_actor_run_result['id'] diff --git a/tests/integration/test_actor_create_proxy_configuration.py b/tests/integration/test_actor_create_proxy_configuration.py index bd841a58..f9737a7a 100644 --- a/tests/integration/test_actor_create_proxy_configuration.py +++ b/tests/integration/test_actor_create_proxy_configuration.py @@ -8,68 +8,62 @@ from .conftest import ActorFactory -class TestActorCreateProxyConfiguration: - async def test_create_proxy_configuration_basic( - self: TestActorCreateProxyConfiguration, - make_actor: ActorFactory, - ) -> None: - async def main() -> None: - groups = ['SHADER'] - country_code = 'US' - - async with Actor: - proxy_configuration = await Actor.create_proxy_configuration( - groups=groups, - country_code=country_code, - ) - - assert proxy_configuration is not None - assert proxy_configuration._groups == groups - assert proxy_configuration._password is not None - assert proxy_configuration._country_code == country_code - - actor = await make_actor('proxy-configuration', main_func=main) - - run_result = await actor.call() - assert run_result is not None - assert run_result['status'] == 'SUCCEEDED' - - async def test_create_proxy_configuration_complex( - self: TestActorCreateProxyConfiguration, - make_actor: ActorFactory, - ) -> None: - async def main() -> None: - await Actor.init() - - proxy_url_suffix = f'{Actor.config.proxy_password}@{Actor.config.proxy_hostname}:{Actor.config.proxy_port}' +async def test_create_basic_proxy_configuration(make_actor: ActorFactory) -> None: + async def main() -> None: + groups = ['SHADER'] + country_code = 'US' + async with Actor: proxy_configuration = await Actor.create_proxy_configuration( - actor_proxy_input={ - 'useApifyProxy': True, - } + groups=groups, + country_code=country_code, ) - assert proxy_configuration is not None - assert await proxy_configuration.new_url() == f'http://auto:{proxy_url_suffix}' - groups = ['SHADER', 'BUYPROXIES94952'] - country_code = 'US' - proxy_configuration = await Actor.create_proxy_configuration( - actor_proxy_input={ - 'useApifyProxy': True, - 'apifyProxyGroups': groups, - 'apifyProxyCountry': country_code, - } - ) assert proxy_configuration is not None - assert ( - await proxy_configuration.new_url() - == f'http://groups-{"+".join(groups)},country-{country_code}:{proxy_url_suffix}' - ) - - await Actor.exit() - - actor = await make_actor('proxy-configuration', main_func=main) - - run_result = await actor.call() - assert run_result is not None - assert run_result['status'] == 'SUCCEEDED' + assert proxy_configuration._groups == groups + assert proxy_configuration._password is not None + assert proxy_configuration._country_code == country_code + + actor = await make_actor('proxy-configuration', main_func=main) + + run_result = await actor.call() + assert run_result is not None + assert run_result['status'] == 'SUCCEEDED' + + +async def test_create_proxy_configuration_with_groups_and_country(make_actor: ActorFactory) -> None: + async def main() -> None: + await Actor.init() + + proxy_url_suffix = f'{Actor.config.proxy_password}@{Actor.config.proxy_hostname}:{Actor.config.proxy_port}' + + proxy_configuration = await Actor.create_proxy_configuration( + actor_proxy_input={ + 'useApifyProxy': True, + } + ) + assert proxy_configuration is not None + assert await proxy_configuration.new_url() == f'http://auto:{proxy_url_suffix}' + + groups = ['SHADER', 'BUYPROXIES94952'] + country_code = 'US' + proxy_configuration = await Actor.create_proxy_configuration( + actor_proxy_input={ + 'useApifyProxy': True, + 'apifyProxyGroups': groups, + 'apifyProxyCountry': country_code, + } + ) + assert proxy_configuration is not None + assert ( + await proxy_configuration.new_url() + == f'http://groups-{"+".join(groups)},country-{country_code}:{proxy_url_suffix}' + ) + + await Actor.exit() + + actor = await make_actor('proxy-configuration', main_func=main) + + run_result = await actor.call() + assert run_result is not None + assert run_result['status'] == 'SUCCEEDED' diff --git a/tests/integration/test_actor_dataset.py b/tests/integration/test_actor_dataset.py index e61446db..d4eeb0fa 100644 --- a/tests/integration/test_actor_dataset.py +++ b/tests/integration/test_actor_dataset.py @@ -15,104 +15,101 @@ from .conftest import ActorFactory -class TestActorPushData: - async def test_push_data(self: TestActorPushData, make_actor: ActorFactory) -> None: - desired_item_count = 100 # Also change inside main() if you're changing this +async def test_push_and_verify_data_in_default_dataset(make_actor: ActorFactory) -> None: + desired_item_count = 100 # Also change inside main() if you're changing this - async def main() -> None: - desired_item_count = 100 - async with Actor: - await Actor.push_data([{'id': i} for i in range(desired_item_count)]) + async def main() -> None: + desired_item_count = 100 + async with Actor: + await Actor.push_data([{'id': i} for i in range(desired_item_count)]) + + actor = await make_actor('push-data', main_func=main) + + run_result = await actor.call() + + assert run_result is not None + assert run_result['status'] == 'SUCCEEDED' + list_page = await actor.last_run().dataset().list_items() + assert list_page.items[0]['id'] == 0 + assert list_page.items[-1]['id'] == desired_item_count - 1 + assert len(list_page.items) == list_page.count == desired_item_count - actor = await make_actor('push-data', main_func=main) - run_result = await actor.call() +async def test_push_large_data_chunks_over_9mb(make_actor: ActorFactory) -> None: + async def main() -> None: + async with Actor: + await Actor.push_data([{'str': 'x' * 10000} for _ in range(5000)]) # ~50MB - assert run_result is not None - assert run_result['status'] == 'SUCCEEDED' - list_page = await actor.last_run().dataset().list_items() - assert list_page.items[0]['id'] == 0 - assert list_page.items[-1]['id'] == desired_item_count - 1 - assert len(list_page.items) == list_page.count == desired_item_count + actor = await make_actor('push-data-over-9mb', main_func=main) - async def test_push_data_over_9mb(self: TestActorPushData, make_actor: ActorFactory) -> None: - async def main() -> None: - async with Actor: - await Actor.push_data([{'str': 'x' * 10000} for _ in range(5000)]) # ~50MB + run_result = await actor.call() - actor = await make_actor('push-data-over-9mb', main_func=main) + assert run_result is not None + assert run_result['status'] == 'SUCCEEDED' + async for item in actor.last_run().dataset().iterate_items(): + assert item['str'] == 'x' * 10000 - run_result = await actor.call() - assert run_result is not None - assert run_result['status'] == 'SUCCEEDED' - async for item in actor.last_run().dataset().iterate_items(): - assert item['str'] == 'x' * 10000 +async def test_same_references_in_default_dataset(make_actor: ActorFactory) -> None: + async def main() -> None: + async with Actor: + dataset1 = await Actor.open_dataset() + dataset2 = await Actor.open_dataset() + assert dataset1 is dataset2 + actor = await make_actor('dataset-same-ref-default', main_func=main) -class TestActorOpenDataset: - async def test_same_references_default(self: TestActorOpenDataset, make_actor: ActorFactory) -> None: - async def main() -> None: - async with Actor: - dataset1 = await Actor.open_dataset() - dataset2 = await Actor.open_dataset() - assert dataset1 is dataset2 + run_result = await actor.call() + assert run_result is not None + assert run_result['status'] == 'SUCCEEDED' - actor = await make_actor('dataset-same-ref-default', main_func=main) - run_result = await actor.call() - assert run_result is not None - assert run_result['status'] == 'SUCCEEDED' +async def test_same_references_in_named_dataset(make_actor: ActorFactory) -> None: + dataset_name = generate_unique_resource_name('dataset') - async def test_same_references_named(self: TestActorOpenDataset, make_actor: ActorFactory) -> None: - dataset_name = generate_unique_resource_name('dataset') + async def main() -> None: + async with Actor: + input_object = await Actor.get_input() + dataset_name = input_object['datasetName'] + dataset_by_name_1 = await Actor.open_dataset(name=dataset_name) + dataset_by_name_2 = await Actor.open_dataset(name=dataset_name) + assert dataset_by_name_1 is dataset_by_name_2 - async def main() -> None: - async with Actor: - input_object = await Actor.get_input() - dataset_name = input_object['datasetName'] - dataset_by_name_1 = await Actor.open_dataset(name=dataset_name) - dataset_by_name_2 = await Actor.open_dataset(name=dataset_name) - assert dataset_by_name_1 is dataset_by_name_2 + dataset_by_id_1 = await Actor.open_dataset(id=dataset_by_name_1._id) + dataset_by_id_2 = await Actor.open_dataset(id=dataset_by_name_1._id) + assert dataset_by_id_1 is dataset_by_name_1 + assert dataset_by_id_2 is dataset_by_id_1 - dataset_by_id_1 = await Actor.open_dataset(id=dataset_by_name_1._id) - dataset_by_id_2 = await Actor.open_dataset(id=dataset_by_name_1._id) - assert dataset_by_id_1 is dataset_by_name_1 - assert dataset_by_id_2 is dataset_by_id_1 + await dataset_by_name_1.drop() - await dataset_by_name_1.drop() + actor = await make_actor('dataset-same-ref-named', main_func=main) - actor = await make_actor('dataset-same-ref-named', main_func=main) + run_result = await actor.call(run_input={'datasetName': dataset_name}) + assert run_result is not None + assert run_result['status'] == 'SUCCEEDED' - run_result = await actor.call(run_input={'datasetName': dataset_name}) - assert run_result is not None - assert run_result['status'] == 'SUCCEEDED' - async def test_force_cloud( - self: TestActorOpenDataset, - apify_client_async: ApifyClientAsync, - monkeypatch: pytest.MonkeyPatch, - ) -> None: - assert apify_client_async.token is not None - monkeypatch.setenv(ApifyEnvVars.TOKEN, apify_client_async.token) +async def test_force_cloud(apify_client_async: ApifyClientAsync, monkeypatch: pytest.MonkeyPatch) -> None: + assert apify_client_async.token is not None + monkeypatch.setenv(ApifyEnvVars.TOKEN, apify_client_async.token) - dataset_name = generate_unique_resource_name('dataset') - dataset_item = {'foo': 'bar'} + dataset_name = generate_unique_resource_name('dataset') + dataset_item = {'foo': 'bar'} - async with Actor: - dataset = await Actor.open_dataset(name=dataset_name, force_cloud=True) - dataset_id = dataset._id + async with Actor: + dataset = await Actor.open_dataset(name=dataset_name, force_cloud=True) + dataset_id = dataset._id - await dataset.push_data(dataset_item) + await dataset.push_data(dataset_item) - dataset_client = apify_client_async.dataset(dataset_id) + dataset_client = apify_client_async.dataset(dataset_id) - try: - dataset_details = await dataset_client.get() - assert dataset_details is not None - assert dataset_details.get('name') == dataset_name + try: + dataset_details = await dataset_client.get() + assert dataset_details is not None + assert dataset_details.get('name') == dataset_name - dataset_items = await dataset_client.list_items() - assert dataset_items.items == [dataset_item] - finally: - await dataset_client.delete() + dataset_items = await dataset_client.list_items() + assert dataset_items.items == [dataset_item] + finally: + await dataset_client.delete() diff --git a/tests/integration/test_actor_events.py b/tests/integration/test_actor_events.py index 01b45bbe..7509a71d 100644 --- a/tests/integration/test_actor_events.py +++ b/tests/integration/test_actor_events.py @@ -11,92 +11,92 @@ from .conftest import ActorFactory -class TestActorEvents: - async def test_interval_events(self: TestActorEvents, make_actor: ActorFactory) -> None: - async def main() -> None: - import os - from datetime import datetime - from typing import Any, Callable - - from apify_shared.consts import ActorEventTypes, ApifyEnvVars - from crawlee.events._types import Event, EventSystemInfoData - - os.environ[ApifyEnvVars.PERSIST_STATE_INTERVAL_MILLIS] = '900' - - was_system_info_emitted = False - system_infos = list[EventSystemInfoData]() - - def on_event(event_type: ActorEventTypes) -> Callable: - async def log_event(data: Any) -> None: - nonlocal was_system_info_emitted - nonlocal system_infos - print(f'Got actor event ({event_type=}, {data=})') - await Actor.push_data({'event_type': event_type, 'data': data}) - if event_type == ActorEventTypes.SYSTEM_INFO: - was_system_info_emitted = True - system_infos.append(data) - - return log_event - - async with Actor: - Actor.on(Event.SYSTEM_INFO, on_event(ActorEventTypes.SYSTEM_INFO)) - Actor.on(Event.PERSIST_STATE, on_event(ActorEventTypes.PERSIST_STATE)) - await asyncio.sleep(3) - - # The SYSTEM_INFO event sometimes takes a while to appear, let's wait for it for a while longer - for _ in range(20): - if was_system_info_emitted: - break - await asyncio.sleep(1) - - # Check that parsing datetimes works correctly - # Check `createdAt` is a datetime (so it's the same locally and on platform) - assert isinstance(system_infos[0].cpu_info.created_at, datetime) - - actor = await make_actor('actor-interval-events', main_func=main) - - run_result = await actor.call() - - assert run_result is not None - assert run_result['status'] == 'SUCCEEDED' - dataset_items_page = await actor.last_run().dataset().list_items() - persist_state_events = [ - item for item in dataset_items_page.items if item['event_type'] == ActorEventTypes.PERSIST_STATE - ] - system_info_events = [ - item for item in dataset_items_page.items if item['event_type'] == ActorEventTypes.SYSTEM_INFO - ] - assert len(persist_state_events) > 2 - assert len(system_info_events) > 0 - - async def test_off_event(self: TestActorEvents, make_actor: ActorFactory) -> None: - async def main() -> None: - import os - - from apify_shared.consts import ApifyEnvVars - from crawlee.events._types import Event - - os.environ[ApifyEnvVars.PERSIST_STATE_INTERVAL_MILLIS] = '100' - - counter = 0 - - def count_event(data): # type: ignore # noqa: ANN202, ANN001 - nonlocal counter - print(data) - counter += 1 - - async with Actor: - Actor.on(Event.PERSIST_STATE, count_event) - await asyncio.sleep(0.5) - assert counter > 1 - last_count = counter - Actor.off(Event.PERSIST_STATE, count_event) - await asyncio.sleep(0.5) - assert counter == last_count - - actor = await make_actor('actor-off-event', main_func=main) - - run = await actor.call() - - assert run is not None - assert run['status'] == 'SUCCEEDED' +async def test_emit_and_capture_interval_events(make_actor: ActorFactory) -> None: + async def main() -> None: + import os + from datetime import datetime + from typing import Any, Callable + + from apify_shared.consts import ActorEventTypes, ApifyEnvVars + from crawlee.events._types import Event, EventSystemInfoData + + os.environ[ApifyEnvVars.PERSIST_STATE_INTERVAL_MILLIS] = '900' + + was_system_info_emitted = False + system_infos = list[EventSystemInfoData]() + + def on_event(event_type: ActorEventTypes) -> Callable: + async def log_event(data: Any) -> None: + nonlocal was_system_info_emitted + nonlocal system_infos + print(f'Got actor event ({event_type=}, {data=})') + await Actor.push_data({'event_type': event_type, 'data': data}) + if event_type == ActorEventTypes.SYSTEM_INFO: + was_system_info_emitted = True + system_infos.append(data) + + return log_event + + async with Actor: + Actor.on(Event.SYSTEM_INFO, on_event(ActorEventTypes.SYSTEM_INFO)) + Actor.on(Event.PERSIST_STATE, on_event(ActorEventTypes.PERSIST_STATE)) + await asyncio.sleep(3) + + # The SYSTEM_INFO event sometimes takes a while to appear, let's wait for it for a while longer + for _ in range(20): + if was_system_info_emitted: + break + await asyncio.sleep(1) + + # Check that parsing datetimes works correctly + # Check `createdAt` is a datetime (so it's the same locally and on platform) + assert isinstance(system_infos[0].cpu_info.created_at, datetime) + + actor = await make_actor('actor-interval-events', main_func=main) + + run_result = await actor.call() + + assert run_result is not None + assert run_result['status'] == 'SUCCEEDED' + dataset_items_page = await actor.last_run().dataset().list_items() + persist_state_events = [ + item for item in dataset_items_page.items if item['event_type'] == ActorEventTypes.PERSIST_STATE + ] + system_info_events = [ + item for item in dataset_items_page.items if item['event_type'] == ActorEventTypes.SYSTEM_INFO + ] + assert len(persist_state_events) > 2 + assert len(system_info_events) > 0 + + +async def test_event_listener_can_be_removed_successfully(make_actor: ActorFactory) -> None: + async def main() -> None: + import os + + from apify_shared.consts import ApifyEnvVars + from crawlee.events._types import Event + + os.environ[ApifyEnvVars.PERSIST_STATE_INTERVAL_MILLIS] = '100' + + counter = 0 + + def count_event(data): # type: ignore # noqa: ANN202, ANN001 + nonlocal counter + print(data) + counter += 1 + + async with Actor: + Actor.on(Event.PERSIST_STATE, count_event) + await asyncio.sleep(0.5) + assert counter > 1 + last_count = counter + Actor.off(Event.PERSIST_STATE, count_event) + await asyncio.sleep(0.5) + assert counter == last_count + + actor = await make_actor('actor-off-event', main_func=main) + + run = await actor.call() + + assert run is not None + assert run['status'] == 'SUCCEEDED' diff --git a/tests/integration/test_actor_key_value_store.py b/tests/integration/test_actor_key_value_store.py index 6a27ef40..98cac610 100644 --- a/tests/integration/test_actor_key_value_store.py +++ b/tests/integration/test_actor_key_value_store.py @@ -15,194 +15,189 @@ from .conftest import ActorFactory -class TestActorOpenKeyValueStore: - async def test_same_references_default(self: TestActorOpenKeyValueStore, make_actor: ActorFactory) -> None: - async def main() -> None: - async with Actor: - kvs1 = await Actor.open_key_value_store() - kvs2 = await Actor.open_key_value_store() - assert kvs1 is kvs2 - - actor = await make_actor('kvs-same-ref-default', main_func=main) - - run_result = await actor.call() - assert run_result is not None - assert run_result['status'] == 'SUCCEEDED' - - async def test_same_references_named(self: TestActorOpenKeyValueStore, make_actor: ActorFactory) -> None: - kvs_name = generate_unique_resource_name('key-value-store') - - async def main() -> None: - async with Actor: - input_object = await Actor.get_input() - kvs_name = input_object['kvsName'] - kvs_by_name_1 = await Actor.open_key_value_store(name=kvs_name) - kvs_by_name_2 = await Actor.open_key_value_store(name=kvs_name) - assert kvs_by_name_1 is kvs_by_name_2 - - kvs_by_id_1 = await Actor.open_key_value_store(id=kvs_by_name_1._id) - kvs_by_id_2 = await Actor.open_key_value_store(id=kvs_by_name_1._id) - assert kvs_by_id_1 is kvs_by_name_1 - assert kvs_by_id_2 is kvs_by_id_1 - - await kvs_by_name_1.drop() - - actor = await make_actor('kvs-same-ref-named', main_func=main) - - run_result = await actor.call(run_input={'kvsName': kvs_name}) - assert run_result is not None - assert run_result['status'] == 'SUCCEEDED' - - async def test_force_cloud( - self: TestActorOpenKeyValueStore, - apify_client_async: ApifyClientAsync, - monkeypatch: pytest.MonkeyPatch, - ) -> None: - assert apify_client_async.token is not None - monkeypatch.setenv(ApifyEnvVars.TOKEN, apify_client_async.token) - - key_value_store_name = generate_unique_resource_name('key_value_store') +async def test_same_references_in_default_kvs(make_actor: ActorFactory) -> None: + async def main() -> None: + async with Actor: + kvs1 = await Actor.open_key_value_store() + kvs2 = await Actor.open_key_value_store() + assert kvs1 is kvs2 + + actor = await make_actor('kvs-same-ref-default', main_func=main) + + run_result = await actor.call() + assert run_result is not None + assert run_result['status'] == 'SUCCEEDED' + +async def test_same_references_in_named_kvs(make_actor: ActorFactory) -> None: + kvs_name = generate_unique_resource_name('key-value-store') + + async def main() -> None: async with Actor: - key_value_store = await Actor.open_key_value_store(name=key_value_store_name, force_cloud=True) - key_value_store_id = key_value_store._id - - await key_value_store.set_value('foo', 'bar') - - key_value_store_client = apify_client_async.key_value_store(key_value_store_id) - - try: - key_value_store_details = await key_value_store_client.get() - assert key_value_store_details is not None - assert key_value_store_details.get('name') == key_value_store_name - - key_value_store_record = await key_value_store_client.get_record('foo') - assert key_value_store_record is not None - assert key_value_store_record['value'] == 'bar' - finally: - await key_value_store_client.delete() - - -class TestActorGetSetValue: - async def test_actor_get_set_value_simple(self: TestActorGetSetValue, make_actor: ActorFactory) -> None: - async def main() -> None: - async with Actor: - await Actor.set_value('test', {'number': 123, 'string': 'a string', 'nested': {'test': 1}}) - value = await Actor.get_value('test') - assert value['number'] == 123 - assert value['string'] == 'a string' - assert value['nested']['test'] == 1 - - actor = await make_actor('actor-get-set-value', main_func=main) - - run_result = await actor.call() - assert run_result is not None - assert run_result['status'] == 'SUCCEEDED' - - async def test_actor_get_set_value_complex(self: TestActorGetSetValue, make_actor: ActorFactory) -> None: - async def main_set() -> None: - async with Actor: - await Actor.set_value('test', {'number': 123, 'string': 'a string', 'nested': {'test': 1}}) - - actor_set = await make_actor('actor-set-value', main_func=main_set) - - run_result_set = await actor_set.call() - assert run_result_set is not None - assert run_result_set['status'] == 'SUCCEEDED' - # Externally check if the value is present in key-value store - test_record = await actor_set.last_run().key_value_store().get_record('test') - assert test_record is not None - test_value = test_record['value'] - assert test_value['number'] == 123 - assert test_value['string'] == 'a string' - assert test_value['nested']['test'] == 1 - - async def main_get() -> None: - async with Actor: - input_object = await Actor.get_input() - # Access KVS of the previous 'set' run - kvs = await Actor.open_key_value_store(id=input_object['kvs-id']) - value = await kvs.get_value('test') - assert value['number'] == 123 - assert value['string'] == 'a string' - assert value['nested']['test'] == 1 - - actor_get = await make_actor('actor-get-value', main_func=main_get) - default_kvs_info = await actor_set.last_run().key_value_store().get() - assert default_kvs_info is not None - - run_result_get = await actor_get.call(run_input={'kvs-id': default_kvs_info['id']}) - assert run_result_get is not None - assert run_result_get['status'] == 'SUCCEEDED' - - -class TestActorGetInput: - async def test_actor_get_input(self: TestActorGetInput, make_actor: ActorFactory) -> None: - actor_source_files = { - 'INPUT_SCHEMA.json': """ - { - "title": "Actor get input test", - "type": "object", - "schemaVersion": 1, - "properties": { - "password": { - "title": "Password", - "type": "string", - "description": "A secret, encrypted input field", - "editor": "textfield", - "isSecret": true - } - }, - "required": ["password"] - } - """, - 'src/main.py': """ - import asyncio - from apify import Actor - - async def main(): - async with Actor: - input_object = await Actor.get_input() - assert input_object is not None - assert input_object['number'] == 123 - assert input_object['string'] == 'a string' - assert input_object['nested']['test'] == 1 - assert input_object['password'] == 'very secret' - """, - } - actor = await make_actor('actor-get-input', source_files=actor_source_files) - - run_result = await actor.call( - run_input={ - 'number': 123, - 'string': 'a string', - 'nested': {'test': 1}, - 'password': 'very secret', + input_object = await Actor.get_input() + kvs_name = input_object['kvsName'] + kvs_by_name_1 = await Actor.open_key_value_store(name=kvs_name) + kvs_by_name_2 = await Actor.open_key_value_store(name=kvs_name) + assert kvs_by_name_1 is kvs_by_name_2 + + kvs_by_id_1 = await Actor.open_key_value_store(id=kvs_by_name_1._id) + kvs_by_id_2 = await Actor.open_key_value_store(id=kvs_by_name_1._id) + assert kvs_by_id_1 is kvs_by_name_1 + assert kvs_by_id_2 is kvs_by_id_1 + + await kvs_by_name_1.drop() + + actor = await make_actor('kvs-same-ref-named', main_func=main) + + run_result = await actor.call(run_input={'kvsName': kvs_name}) + assert run_result is not None + assert run_result['status'] == 'SUCCEEDED' + + +async def test_force_cloud(apify_client_async: ApifyClientAsync, monkeypatch: pytest.MonkeyPatch) -> None: + assert apify_client_async.token is not None + monkeypatch.setenv(ApifyEnvVars.TOKEN, apify_client_async.token) + + key_value_store_name = generate_unique_resource_name('key_value_store') + + async with Actor: + key_value_store = await Actor.open_key_value_store(name=key_value_store_name, force_cloud=True) + key_value_store_id = key_value_store._id + + await key_value_store.set_value('foo', 'bar') + + key_value_store_client = apify_client_async.key_value_store(key_value_store_id) + + try: + key_value_store_details = await key_value_store_client.get() + assert key_value_store_details is not None + assert key_value_store_details.get('name') == key_value_store_name + + key_value_store_record = await key_value_store_client.get_record('foo') + assert key_value_store_record is not None + assert key_value_store_record['value'] == 'bar' + finally: + await key_value_store_client.delete() + + +async def test_set_and_get_value_in_same_run(make_actor: ActorFactory) -> None: + async def main() -> None: + async with Actor: + await Actor.set_value('test', {'number': 123, 'string': 'a string', 'nested': {'test': 1}}) + value = await Actor.get_value('test') + assert value['number'] == 123 + assert value['string'] == 'a string' + assert value['nested']['test'] == 1 + + actor = await make_actor('actor-get-set-value', main_func=main) + + run_result = await actor.call() + assert run_result is not None + assert run_result['status'] == 'SUCCEEDED' + + +async def test_set_value_in_one_run_and_get_value_in_another(make_actor: ActorFactory) -> None: + async def main_set() -> None: + async with Actor: + await Actor.set_value('test', {'number': 123, 'string': 'a string', 'nested': {'test': 1}}) + + actor_set = await make_actor('actor-set-value', main_func=main_set) + + run_result_set = await actor_set.call() + assert run_result_set is not None + assert run_result_set['status'] == 'SUCCEEDED' + # Externally check if the value is present in key-value store + test_record = await actor_set.last_run().key_value_store().get_record('test') + assert test_record is not None + test_value = test_record['value'] + assert test_value['number'] == 123 + assert test_value['string'] == 'a string' + assert test_value['nested']['test'] == 1 + + async def main_get() -> None: + async with Actor: + input_object = await Actor.get_input() + # Access KVS of the previous 'set' run + kvs = await Actor.open_key_value_store(id=input_object['kvs-id']) + value = await kvs.get_value('test') + assert value['number'] == 123 + assert value['string'] == 'a string' + assert value['nested']['test'] == 1 + + actor_get = await make_actor('actor-get-value', main_func=main_get) + default_kvs_info = await actor_set.last_run().key_value_store().get() + assert default_kvs_info is not None + + run_result_get = await actor_get.call(run_input={'kvs-id': default_kvs_info['id']}) + assert run_result_get is not None + assert run_result_get['status'] == 'SUCCEEDED' + + +async def test_actor_get_input_from_run(make_actor: ActorFactory) -> None: + actor_source_files = { + 'INPUT_SCHEMA.json': """ + { + "title": "Actor get input test", + "type": "object", + "schemaVersion": 1, + "properties": { + "password": { + "title": "Password", + "type": "string", + "description": "A secret, encrypted input field", + "editor": "textfield", + "isSecret": true + } + }, + "required": ["password"] } - ) - assert run_result is not None - assert run_result['status'] == 'SUCCEEDED' + """, + 'src/main.py': """ + import asyncio + from apify import Actor + + async def main(): + async with Actor: + input_object = await Actor.get_input() + assert input_object is not None + assert input_object['number'] == 123 + assert input_object['string'] == 'a string' + assert input_object['nested']['test'] == 1 + assert input_object['password'] == 'very secret' + """, + } + actor = await make_actor('actor-get-input', source_files=actor_source_files) + + run_result = await actor.call( + run_input={ + 'number': 123, + 'string': 'a string', + 'nested': {'test': 1}, + 'password': 'very secret', + } + ) + assert run_result is not None + assert run_result['status'] == 'SUCCEEDED' -class TestGetPublicUrl: - async def test_get_public_url(self: TestGetPublicUrl, make_actor: ActorFactory) -> None: - async def main() -> None: - from typing import cast +async def test_generate_public_url_for_kvs_record(make_actor: ActorFactory) -> None: + async def main() -> None: + from typing import cast - from apify.apify_storage_client._key_value_store_client import KeyValueStoreClient + from apify.apify_storage_client._key_value_store_client import KeyValueStoreClient - async with Actor: - public_api_url = Actor.config.api_public_base_url - default_store_id = Actor.config.default_key_value_store_id + async with Actor: + public_api_url = Actor.config.api_public_base_url + default_store_id = Actor.config.default_key_value_store_id - store = await Actor.open_key_value_store() - record_url = await cast(KeyValueStoreClient, store._resource_client).get_public_url('dummy') - print(record_url) + store = await Actor.open_key_value_store() + record_url = await cast(KeyValueStoreClient, store._resource_client).get_public_url('dummy') + print(record_url) - assert record_url == f'{public_api_url}/v2/key-value-stores/{default_store_id}/records/dummy' + assert record_url == f'{public_api_url}/v2/key-value-stores/{default_store_id}/records/dummy' - actor = await make_actor('kvs-get-public-url', main_func=main) + actor = await make_actor('kvs-get-public-url', main_func=main) - run_result = await actor.call() - assert run_result is not None - assert run_result['status'] == 'SUCCEEDED' + run_result = await actor.call() + assert run_result is not None + assert run_result['status'] == 'SUCCEEDED' diff --git a/tests/integration/test_actor_lifecycle.py b/tests/integration/test_actor_lifecycle.py index 84d1b22f..b2f756f9 100644 --- a/tests/integration/test_actor_lifecycle.py +++ b/tests/integration/test_actor_lifecycle.py @@ -8,103 +8,102 @@ from .conftest import ActorFactory -class TestActorInit: - async def test_actor_init(self: TestActorInit, make_actor: ActorFactory) -> None: - async def main() -> None: - my_actor = Actor +async def test_actor_init_and_double_init_prevention(make_actor: ActorFactory) -> None: + async def main() -> None: + my_actor = Actor + await my_actor.init() + assert my_actor._is_initialized is True + double_init = False + try: await my_actor.init() - assert my_actor._is_initialized is True - double_init = False - try: - await my_actor.init() - double_init = True - except RuntimeError as err: - assert str(err) == 'The Actor was already initialized!' # noqa: PT017 - except Exception: - raise - try: - await Actor.init() - double_init = True - except RuntimeError as err: - assert str(err) == 'The Actor was already initialized!' # noqa: PT017 - except Exception: - raise - await my_actor.exit() - assert double_init is False - assert my_actor._is_initialized is False - - actor = await make_actor('actor-init', main_func=main) - - run_result = await actor.call() + double_init = True + except RuntimeError as err: + assert str(err) == 'The Actor was already initialized!' # noqa: PT017 + except Exception: + raise + try: + await Actor.init() + double_init = True + except RuntimeError as err: + assert str(err) == 'The Actor was already initialized!' # noqa: PT017 + except Exception: + raise + await my_actor.exit() + assert double_init is False + assert my_actor._is_initialized is False - assert run_result is not None - assert run_result['status'] == 'SUCCEEDED' - - async def test_async_with_actor_properly_initialize(self: TestActorInit, make_actor: ActorFactory) -> None: - async def main() -> None: - import apify._actor + actor = await make_actor('actor-init', main_func=main) - async with Actor: - assert apify._actor.Actor._is_initialized - assert apify._actor.Actor._is_initialized is False + run_result = await actor.call() - actor = await make_actor('with-actor-init', main_func=main) + assert run_result is not None + assert run_result['status'] == 'SUCCEEDED' - run_result = await actor.call() - assert run_result is not None - assert run_result['status'] == 'SUCCEEDED' +async def test_actor_init_correctly_in_async_with_block(make_actor: ActorFactory) -> None: + async def main() -> None: + import apify._actor + async with Actor: + assert apify._actor.Actor._is_initialized + assert apify._actor.Actor._is_initialized is False -class TestActorExit: - async def test_actor_exit_code(self: TestActorExit, make_actor: ActorFactory) -> None: - async def main() -> None: - async with Actor: - input = await Actor.get_input() # noqa: A001 - await Actor.exit(**input) + actor = await make_actor('with-actor-init', main_func=main) - actor = await make_actor('actor-exit', main_func=main) + run_result = await actor.call() - for exit_code in [0, 1, 101]: - run_result = await actor.call(run_input={'exit_code': exit_code}) - assert run_result is not None - assert run_result['exitCode'] == exit_code - assert run_result['status'] == 'FAILED' if exit_code > 0 else 'SUCCEEDED' + assert run_result is not None + assert run_result['status'] == 'SUCCEEDED' -class TestActorFail: - async def test_fail_exit_code(self: TestActorFail, make_actor: ActorFactory) -> None: - async def main() -> None: - async with Actor: - input = await Actor.get_input() # noqa: A001 - await Actor.fail(**input) if input else await Actor.fail() +async def test_actor_exit_with_different_exit_codes(make_actor: ActorFactory) -> None: + async def main() -> None: + async with Actor: + input = await Actor.get_input() # noqa: A001 + await Actor.exit(**input) - actor = await make_actor('actor-fail', main_func=main) + actor = await make_actor('actor-exit', main_func=main) - run_result = await actor.call() + for exit_code in [0, 1, 101]: + run_result = await actor.call(run_input={'exit_code': exit_code}) assert run_result is not None - assert run_result['exitCode'] == 1 - assert run_result['status'] == 'FAILED' + assert run_result['exitCode'] == exit_code + assert run_result['status'] == 'FAILED' if exit_code > 0 else 'SUCCEEDED' - for exit_code in [1, 10, 100]: - run_result = await actor.call(run_input={'exit_code': exit_code}) - assert run_result is not None - assert run_result['exitCode'] == exit_code - assert run_result['status'] == 'FAILED' - # fail with status message - run_result = await actor.call(run_input={'status_message': 'This is a test message'}) - assert run_result is not None - assert run_result['status'] == 'FAILED' - assert run_result.get('statusMessage') == 'This is a test message' +async def test_actor_fail_with_custom_exit_codes_and_status_messages(make_actor: ActorFactory) -> None: + async def main() -> None: + async with Actor: + input = await Actor.get_input() # noqa: A001 + await Actor.fail(**input) if input else await Actor.fail() - async def test_with_actor_fail_correctly(self: TestActorFail, make_actor: ActorFactory) -> None: - async def main() -> None: - async with Actor: - raise Exception('This is a test exception') # noqa: TRY002 + actor = await make_actor('actor-fail', main_func=main) - actor = await make_actor('with-actor-fail', main_func=main) - run_result = await actor.call() + run_result = await actor.call() + assert run_result is not None + assert run_result['exitCode'] == 1 + assert run_result['status'] == 'FAILED' + + for exit_code in [1, 10, 100]: + run_result = await actor.call(run_input={'exit_code': exit_code}) assert run_result is not None - assert run_result['exitCode'] == 91 + assert run_result['exitCode'] == exit_code assert run_result['status'] == 'FAILED' + + # fail with status message + run_result = await actor.call(run_input={'status_message': 'This is a test message'}) + assert run_result is not None + assert run_result['status'] == 'FAILED' + assert run_result.get('statusMessage') == 'This is a test message' + + +async def test_actor_fails_correctly_with_exception(make_actor: ActorFactory) -> None: + async def main() -> None: + async with Actor: + raise Exception('This is a test exception') # noqa: TRY002 + + actor = await make_actor('with-actor-fail', main_func=main) + run_result = await actor.call() + assert run_result is not None + assert run_result['exitCode'] == 91 + assert run_result['status'] == 'FAILED' diff --git a/tests/integration/test_actor_log.py b/tests/integration/test_actor_log.py index 117dba37..103e4794 100644 --- a/tests/integration/test_actor_log.py +++ b/tests/integration/test_actor_log.py @@ -8,80 +8,79 @@ from .conftest import ActorFactory -class TestActorLog: - async def test_actor_log(self: TestActorLog, make_actor: ActorFactory) -> None: - async def main() -> None: - import logging - - from apify.log import logger - - async with Actor: - logger.setLevel(logging.DEBUG) - - # Test Actor.log - Actor.log.debug('Debug message') - Actor.log.info('Info message') - - # Test logger - logger.warning('Warning message') - logger.error('Error message') - - # Test that exception is logged with the traceback - try: - raise ValueError('Dummy ValueError') - except Exception: - Actor.log.exception('Exception message') - - # Test multiline message being indented correctly - logger.info('Multi\nline\nlog\nmessage') - - # Test that exception in Actor.main is logged with the traceback - raise RuntimeError('Dummy RuntimeError') - - actor = await make_actor('actor-log', main_func=main) - - run_result = await actor.call() - - assert run_result is not None - assert run_result['status'] == 'FAILED' - - run_log = await actor.last_run().log().get() - assert run_log is not None - - run_log_lines = run_log.splitlines() - - # This should prevent issues when the test run is migrated, and it would have its log restarted - expected_log_lines_count = 24 - assert len(run_log_lines) >= expected_log_lines_count - run_log_lines = run_log_lines[-expected_log_lines_count:] - - # This removes the datetime from the start of log lines - run_log_lines = [line[25:] for line in run_log_lines] - - # This might be way too specific and easy to break, but let's hope not - assert run_log_lines.pop(0).startswith('ACTOR: Pulling Docker image') - assert run_log_lines.pop(0) == 'ACTOR: Creating Docker container.' - assert run_log_lines.pop(0) == 'ACTOR: Starting Docker container.' - assert run_log_lines.pop(0) == '[apify] INFO Initializing Actor...' - assert run_log_lines.pop(0).startswith( - f'[apify] INFO System info ({{"apify_sdk_version": "{__version__}", "apify_client_version": "' - ) - assert run_log_lines.pop(0) == '[apify] DEBUG Debug message' - assert run_log_lines.pop(0) == '[apify] INFO Info message' - assert run_log_lines.pop(0) == '[apify] WARN Warning message' - assert run_log_lines.pop(0) == '[apify] ERROR Error message' - assert run_log_lines.pop(0) == '[apify] ERROR Exception message' - assert run_log_lines.pop(0) == ' Traceback (most recent call last):' - assert run_log_lines.pop(0) == ' File "/usr/src/app/src/main.py", line 25, in main' - assert run_log_lines.pop(0) == " raise ValueError('Dummy ValueError')" - assert run_log_lines.pop(0) == ' ValueError: Dummy ValueError' - assert run_log_lines.pop(0) == '[apify] INFO Multi' - assert run_log_lines.pop(0) == 'line' - assert run_log_lines.pop(0) == 'log' - assert run_log_lines.pop(0) == 'message' - assert run_log_lines.pop(0) == '[apify] ERROR Actor failed with an exception' - assert run_log_lines.pop(0) == ' Traceback (most recent call last):' - assert run_log_lines.pop(0) == ' File "/usr/src/app/src/main.py", line 33, in main' - assert run_log_lines.pop(0) == " raise RuntimeError('Dummy RuntimeError')" - assert run_log_lines.pop(0) == ' RuntimeError: Dummy RuntimeError' - assert run_log_lines.pop(0) == '[apify] INFO Exiting Actor ({"exit_code": 91})' +async def test_actor_logging(make_actor: ActorFactory) -> None: + async def main() -> None: + import logging + + from apify.log import logger + + async with Actor: + logger.setLevel(logging.DEBUG) + + # Test Actor.log + Actor.log.debug('Debug message') + Actor.log.info('Info message') + + # Test logger + logger.warning('Warning message') + logger.error('Error message') + + # Test that exception is logged with the traceback + try: + raise ValueError('Dummy ValueError') + except Exception: + Actor.log.exception('Exception message') + + # Test multiline message being indented correctly + logger.info('Multi\nline\nlog\nmessage') + + # Test that exception in Actor.main is logged with the traceback + raise RuntimeError('Dummy RuntimeError') + + actor = await make_actor('actor-log', main_func=main) + + run_result = await actor.call() + + assert run_result is not None + assert run_result['status'] == 'FAILED' + + run_log = await actor.last_run().log().get() + assert run_log is not None + + run_log_lines = run_log.splitlines() + + # This should prevent issues when the test run is migrated, and it would have its log restarted + expected_log_lines_count = 24 + assert len(run_log_lines) >= expected_log_lines_count + run_log_lines = run_log_lines[-expected_log_lines_count:] + + # This removes the datetime from the start of log lines + run_log_lines = [line[25:] for line in run_log_lines] + + # This might be way too specific and easy to break, but let's hope not + assert run_log_lines.pop(0).startswith('ACTOR: Pulling Docker image') + assert run_log_lines.pop(0) == 'ACTOR: Creating Docker container.' + assert run_log_lines.pop(0) == 'ACTOR: Starting Docker container.' + assert run_log_lines.pop(0) == '[apify] INFO Initializing Actor...' + assert run_log_lines.pop(0).startswith( + f'[apify] INFO System info ({{"apify_sdk_version": "{__version__}", "apify_client_version": "' + ) + assert run_log_lines.pop(0) == '[apify] DEBUG Debug message' + assert run_log_lines.pop(0) == '[apify] INFO Info message' + assert run_log_lines.pop(0) == '[apify] WARN Warning message' + assert run_log_lines.pop(0) == '[apify] ERROR Error message' + assert run_log_lines.pop(0) == '[apify] ERROR Exception message' + assert run_log_lines.pop(0) == ' Traceback (most recent call last):' + assert run_log_lines.pop(0) == ' File "/usr/src/app/src/main.py", line 25, in main' + assert run_log_lines.pop(0) == " raise ValueError('Dummy ValueError')" + assert run_log_lines.pop(0) == ' ValueError: Dummy ValueError' + assert run_log_lines.pop(0) == '[apify] INFO Multi' + assert run_log_lines.pop(0) == 'line' + assert run_log_lines.pop(0) == 'log' + assert run_log_lines.pop(0) == 'message' + assert run_log_lines.pop(0) == '[apify] ERROR Actor failed with an exception' + assert run_log_lines.pop(0) == ' Traceback (most recent call last):' + assert run_log_lines.pop(0) == ' File "/usr/src/app/src/main.py", line 33, in main' + assert run_log_lines.pop(0) == " raise RuntimeError('Dummy RuntimeError')" + assert run_log_lines.pop(0) == ' RuntimeError: Dummy RuntimeError' + assert run_log_lines.pop(0) == '[apify] INFO Exiting Actor ({"exit_code": 91})' diff --git a/tests/integration/test_actor_request_queue.py b/tests/integration/test_actor_request_queue.py index adc5784a..fc1902a3 100644 --- a/tests/integration/test_actor_request_queue.py +++ b/tests/integration/test_actor_request_queue.py @@ -16,69 +16,66 @@ from .conftest import ActorFactory -class TestActorOpenRequestQueue: - async def test_same_references_default(self: TestActorOpenRequestQueue, make_actor: ActorFactory) -> None: - async def main() -> None: - async with Actor: - rq1 = await Actor.open_request_queue() - rq2 = await Actor.open_request_queue() - assert rq1 is rq2 - - actor = await make_actor('rq-same-ref-default', main_func=main) - - run_result = await actor.call() - assert run_result is not None - assert run_result['status'] == 'SUCCEEDED' - - async def test_same_references_named(self: TestActorOpenRequestQueue, make_actor: ActorFactory) -> None: - rq_name = generate_unique_resource_name('request-queue') - - async def main() -> None: - async with Actor: - input_object = await Actor.get_input() - rq_name = input_object['rqName'] - rq_by_name_1 = await Actor.open_request_queue(name=rq_name) - rq_by_name_2 = await Actor.open_request_queue(name=rq_name) - assert rq_by_name_1 is rq_by_name_2 - - rq_by_id_1 = await Actor.open_request_queue(id=rq_by_name_1._id) - rq_by_id_2 = await Actor.open_request_queue(id=rq_by_name_1._id) - assert rq_by_id_1 is rq_by_name_1 - assert rq_by_id_2 is rq_by_id_1 - - await rq_by_name_1.drop() - - actor = await make_actor('rq-same-ref-named', main_func=main) - - run_result = await actor.call(run_input={'rqName': rq_name}) - assert run_result is not None - assert run_result['status'] == 'SUCCEEDED' - - async def test_force_cloud( - self: TestActorOpenRequestQueue, - apify_client_async: ApifyClientAsync, - monkeypatch: pytest.MonkeyPatch, - ) -> None: - assert apify_client_async.token is not None - monkeypatch.setenv(ApifyEnvVars.TOKEN, apify_client_async.token) - - request_queue_name = generate_unique_resource_name('request_queue') +async def test_same_references_in_default_rq(make_actor: ActorFactory) -> None: + async def main() -> None: + async with Actor: + rq1 = await Actor.open_request_queue() + rq2 = await Actor.open_request_queue() + assert rq1 is rq2 + + actor = await make_actor('rq-same-ref-default', main_func=main) + + run_result = await actor.call() + assert run_result is not None + assert run_result['status'] == 'SUCCEEDED' + +async def test_same_references_in_named_rq(make_actor: ActorFactory) -> None: + rq_name = generate_unique_resource_name('request-queue') + + async def main() -> None: async with Actor: - request_queue = await Actor.open_request_queue(name=request_queue_name, force_cloud=True) - request_queue_id = request_queue._id + input_object = await Actor.get_input() + rq_name = input_object['rqName'] + rq_by_name_1 = await Actor.open_request_queue(name=rq_name) + rq_by_name_2 = await Actor.open_request_queue(name=rq_name) + assert rq_by_name_1 is rq_by_name_2 + + rq_by_id_1 = await Actor.open_request_queue(id=rq_by_name_1._id) + rq_by_id_2 = await Actor.open_request_queue(id=rq_by_name_1._id) + assert rq_by_id_1 is rq_by_name_1 + assert rq_by_id_2 is rq_by_id_1 + + await rq_by_name_1.drop() + + actor = await make_actor('rq-same-ref-named', main_func=main) + + run_result = await actor.call(run_input={'rqName': rq_name}) + assert run_result is not None + assert run_result['status'] == 'SUCCEEDED' + + +async def test_force_cloud(apify_client_async: ApifyClientAsync, monkeypatch: pytest.MonkeyPatch) -> None: + assert apify_client_async.token is not None + monkeypatch.setenv(ApifyEnvVars.TOKEN, apify_client_async.token) + + request_queue_name = generate_unique_resource_name('request_queue') + + async with Actor: + request_queue = await Actor.open_request_queue(name=request_queue_name, force_cloud=True) + request_queue_id = request_queue._id - request_info = await request_queue.add_request(Request.from_url('http://example.com')) + request_info = await request_queue.add_request(Request.from_url('http://example.com')) - request_queue_client = apify_client_async.request_queue(request_queue_id) + request_queue_client = apify_client_async.request_queue(request_queue_id) - try: - request_queue_details = await request_queue_client.get() - assert request_queue_details is not None - assert request_queue_details.get('name') == request_queue_name + try: + request_queue_details = await request_queue_client.get() + assert request_queue_details is not None + assert request_queue_details.get('name') == request_queue_name - request_queue_request = await request_queue_client.get_request(request_info.id) - assert request_queue_request is not None - assert request_queue_request['url'] == 'http://example.com' - finally: - await request_queue_client.delete() + request_queue_request = await request_queue_client.get_request(request_info.id) + assert request_queue_request is not None + assert request_queue_request['url'] == 'http://example.com' + finally: + await request_queue_client.delete() diff --git a/tests/integration/test_fixtures.py b/tests/integration/test_fixtures.py index 93ff5588..049a3d73 100644 --- a/tests/integration/test_fixtures.py +++ b/tests/integration/test_fixtures.py @@ -13,84 +13,81 @@ from .conftest import ActorFactory -class TestMakeActorFixture: - async def test_main_func(self: TestMakeActorFixture, make_actor: ActorFactory) -> None: - async def main() -> None: - import os +async def test_actor_from_main_func(make_actor: ActorFactory) -> None: + async def main() -> None: + import os - from apify_shared.consts import ActorEnvVars + from apify_shared.consts import ActorEnvVars + async with Actor: + await Actor.set_value('OUTPUT', os.getenv(ActorEnvVars.ID)) + + actor = await make_actor('make-actor-main-func', main_func=main) + + run_result = await actor.call() + + assert run_result is not None + assert run_result['status'] == 'SUCCEEDED' + + output_record = await actor.last_run().key_value_store().get_record('OUTPUT') + assert output_record is not None + assert run_result['actId'] == output_record['value'] + + +async def test_actor_from_main_py(make_actor: ActorFactory) -> None: + expected_output = f'ACTOR_OUTPUT_{crypto_random_object_id(5)}' + main_py_source = f""" + import asyncio + from apify import Actor + async def main(): async with Actor: - await Actor.set_value('OUTPUT', os.getenv(ActorEnvVars.ID)) + await Actor.set_value('OUTPUT', '{expected_output}') + """ + + actor = await make_actor('make-actor-main-py', main_py=main_py_source) - actor = await make_actor('make-actor-main-func', main_func=main) + run_result = await actor.call() + assert run_result is not None + assert run_result['status'] == 'SUCCEEDED' - run_result = await actor.call() + output_record = await actor.last_run().key_value_store().get_record('OUTPUT') + assert output_record is not None + assert output_record['value'] == expected_output - assert run_result is not None - assert run_result['status'] == 'SUCCEEDED' - output_record = await actor.last_run().key_value_store().get_record('OUTPUT') - assert output_record is not None - assert run_result['actId'] == output_record['value'] +async def test_actor_from_source_files(make_actor: ActorFactory) -> None: + test_started_at = datetime.now(timezone.utc) + actor_source_files = { + 'src/utils.py': """ + from datetime import datetime, timezone - async def test_main_py(self: TestMakeActorFixture, make_actor: ActorFactory) -> None: - expected_output = f'ACTOR_OUTPUT_{crypto_random_object_id(5)}' - main_py_source = f""" + def get_current_datetime(): + return datetime.now(timezone.utc) + """, + 'src/main.py': """ import asyncio from apify import Actor + from .utils import get_current_datetime + async def main(): async with Actor: - await Actor.set_value('OUTPUT', '{expected_output}') - """ - - actor = await make_actor('make-actor-main-py', main_py=main_py_source) - - run_result = await actor.call() - assert run_result is not None - assert run_result['status'] == 'SUCCEEDED' - - output_record = await actor.last_run().key_value_store().get_record('OUTPUT') - assert output_record is not None - assert output_record['value'] == expected_output - - async def test_source_files(self: TestMakeActorFixture, make_actor: ActorFactory) -> None: - test_started_at = datetime.now(timezone.utc) - actor_source_files = { - 'src/utils.py': """ - from datetime import datetime, timezone - - def get_current_datetime(): - return datetime.now(timezone.utc) - """, - 'src/main.py': """ - import asyncio - from apify import Actor - from .utils import get_current_datetime - - async def main(): - async with Actor: - current_datetime = get_current_datetime() - await Actor.set_value('OUTPUT', current_datetime) - """, - } - actor = await make_actor('make-actor-source-files', source_files=actor_source_files) - - actor_run = await actor.call() - assert actor_run is not None - assert actor_run['status'] == 'SUCCEEDED' - - output_record = await actor.last_run().key_value_store().get_record('OUTPUT') - assert output_record is not None - - output_datetime = datetime.fromisoformat(output_record['value']) - assert output_datetime > test_started_at - assert output_datetime < datetime.now(timezone.utc) - - -class TestApifyClientAsyncFixture: - async def test_apify_client_async_works( - self: TestApifyClientAsyncFixture, - apify_client_async: ApifyClientAsync, - ) -> None: - assert await apify_client_async.user('me').get() is not None + current_datetime = get_current_datetime() + await Actor.set_value('OUTPUT', current_datetime) + """, + } + actor = await make_actor('make-actor-source-files', source_files=actor_source_files) + + actor_run = await actor.call() + assert actor_run is not None + assert actor_run['status'] == 'SUCCEEDED' + + output_record = await actor.last_run().key_value_store().get_record('OUTPUT') + assert output_record is not None + + output_datetime = datetime.fromisoformat(output_record['value']) + assert output_datetime > test_started_at + assert output_datetime < datetime.now(timezone.utc) + + +async def test_apify_client_async_works(apify_client_async: ApifyClientAsync) -> None: + assert await apify_client_async.user('me').get() is not None diff --git a/tests/integration/test_request_queue.py b/tests/integration/test_request_queue.py index 2ec06914..19251ad0 100644 --- a/tests/integration/test_request_queue.py +++ b/tests/integration/test_request_queue.py @@ -8,99 +8,100 @@ from .conftest import ActorFactory -class TestRequestQueue: - async def test_simple(self: TestRequestQueue, make_actor: ActorFactory) -> None: - async def main() -> None: - async with Actor: - desired_request_count = 100 - print('Opening request queue...') - # I have seen it get stuck on this call - rq = await Actor.open_request_queue() - # Add some requests - for i in range(desired_request_count): - print(f'Adding request {i}...') - await rq.add_request(f'https://example.com/{i}') - - handled_request_count = 0 - while next_request := await rq.fetch_next_request(): - print('Fetching next request...') - queue_operation_info = await rq.mark_request_as_handled(next_request) - assert queue_operation_info is not None - assert queue_operation_info.was_already_handled is False - handled_request_count += 1 - - assert handled_request_count == desired_request_count - print('Waiting for queue to be finished...') - is_finished = await rq.is_finished() - assert is_finished is True - - actor = await make_actor('rq-simple-test', main_func=main) - - run_result = await actor.call() - assert run_result is not None - assert run_result['status'] == 'SUCCEEDED' - - async def test_batch(self, make_actor: ActorFactory) -> None: - async def main() -> None: - async with Actor: - desired_request_count = 100 - print('Opening request queue...') - # I have seen it get stuck on this call - rq = await Actor.open_request_queue() - # Add some requests - await rq.add_requests_batched([f'https://example.com/{i}' for i in range(desired_request_count)]) - - handled_request_count = 0 - while next_request := await rq.fetch_next_request(): - print('Fetching next request...') - queue_operation_info = await rq.mark_request_as_handled(next_request) - assert queue_operation_info is not None - assert queue_operation_info.was_already_handled is False - handled_request_count += 1 - - assert handled_request_count == desired_request_count - print('Waiting for queue to be finished...') - is_finished = await rq.is_finished() - assert is_finished is True - - actor = await make_actor('rq-batch-test', main_func=main) - - run_result = await actor.call() - assert run_result is not None - assert run_result['status'] == 'SUCCEEDED' - - async def test_batch_non_unique(self, make_actor: ActorFactory) -> None: - async def main() -> None: - from crawlee import Request - - async with Actor: - desired_request_count = 100 - print('Opening request queue...') - # I have seen it get stuck on this call - rq = await Actor.open_request_queue() - # Add some requests - await rq.add_requests_batched( - [ - Request.from_url(f'https://example.com/{i}', unique_key=str(i - 1 if i % 4 == 1 else i)) - for i in range(desired_request_count) - ] - ) - - handled_request_count = 0 - while next_request := await rq.fetch_next_request(): - print('Fetching next request...') - queue_operation_info = await rq.mark_request_as_handled(next_request) - assert queue_operation_info is not None - assert queue_operation_info.was_already_handled is False - handled_request_count += 1 - - assert handled_request_count == desired_request_count * 3 / 4 - print('Waiting for queue to be finished...') - is_finished = await rq.is_finished() - assert is_finished is True - - actor = await make_actor('rq-batch-test', main_func=main) - - run_result = await actor.call() - assert run_result is not None - assert run_result['status'] == 'SUCCEEDED' +async def test_add_and_fetch_requests(make_actor: ActorFactory) -> None: + async def main() -> None: + async with Actor: + desired_request_count = 100 + print('Opening request queue...') + # I have seen it get stuck on this call + rq = await Actor.open_request_queue() + # Add some requests + for i in range(desired_request_count): + print(f'Adding request {i}...') + await rq.add_request(f'https://example.com/{i}') + + handled_request_count = 0 + while next_request := await rq.fetch_next_request(): + print('Fetching next request...') + queue_operation_info = await rq.mark_request_as_handled(next_request) + assert queue_operation_info is not None + assert queue_operation_info.was_already_handled is False + handled_request_count += 1 + + assert handled_request_count == desired_request_count + print('Waiting for queue to be finished...') + is_finished = await rq.is_finished() + assert is_finished is True + + actor = await make_actor('rq-simple-test', main_func=main) + + run_result = await actor.call() + assert run_result is not None + assert run_result['status'] == 'SUCCEEDED' + + +async def test_add_requests_in_batches(make_actor: ActorFactory) -> None: + async def main() -> None: + async with Actor: + desired_request_count = 100 + print('Opening request queue...') + # I have seen it get stuck on this call + rq = await Actor.open_request_queue() + # Add some requests + await rq.add_requests_batched([f'https://example.com/{i}' for i in range(desired_request_count)]) + + handled_request_count = 0 + while next_request := await rq.fetch_next_request(): + print('Fetching next request...') + queue_operation_info = await rq.mark_request_as_handled(next_request) + assert queue_operation_info is not None + assert queue_operation_info.was_already_handled is False + handled_request_count += 1 + + assert handled_request_count == desired_request_count + print('Waiting for queue to be finished...') + is_finished = await rq.is_finished() + assert is_finished is True + + actor = await make_actor('rq-batch-test', main_func=main) + + run_result = await actor.call() + assert run_result is not None + assert run_result['status'] == 'SUCCEEDED' + + +async def test_add_non_unique_requests_in_batch(make_actor: ActorFactory) -> None: + async def main() -> None: + from crawlee import Request + + async with Actor: + desired_request_count = 100 + print('Opening request queue...') + # I have seen it get stuck on this call + rq = await Actor.open_request_queue() + # Add some requests + await rq.add_requests_batched( + [ + Request.from_url(f'https://example.com/{i}', unique_key=str(i - 1 if i % 4 == 1 else i)) + for i in range(desired_request_count) + ] + ) + + handled_request_count = 0 + while next_request := await rq.fetch_next_request(): + print('Fetching next request...') + queue_operation_info = await rq.mark_request_as_handled(next_request) + assert queue_operation_info is not None + assert queue_operation_info.was_already_handled is False + handled_request_count += 1 + + assert handled_request_count == desired_request_count * 3 / 4 + print('Waiting for queue to be finished...') + is_finished = await rq.is_finished() + assert is_finished is True + + actor = await make_actor('rq-batch-test', main_func=main) + + run_result = await actor.call() + assert run_result is not None + assert run_result['status'] == 'SUCCEEDED' diff --git a/tests/unit/actor/test_actor_create_proxy_configuration.py b/tests/unit/actor/test_actor_create_proxy_configuration.py index 5111b452..e0c7cd57 100644 --- a/tests/unit/actor/test_actor_create_proxy_configuration.py +++ b/tests/unit/actor/test_actor_create_proxy_configuration.py @@ -24,122 +24,120 @@ def patched_apify_client(apify_client_async_patcher: ApifyClientAsyncPatcher) -> return ApifyClientAsync() -class TestActorCreateProxyConfiguration: - async def test_create_proxy_configuration_basic( - self: TestActorCreateProxyConfiguration, - monkeypatch: pytest.MonkeyPatch, - respx_mock: MockRouter, - patched_apify_client: ApifyClientAsync, - ) -> None: - dummy_proxy_status_url = 'http://dummy-proxy-status-url.com' - monkeypatch.setenv(ApifyEnvVars.TOKEN.value, 'DUMMY_TOKEN') - monkeypatch.setenv(ApifyEnvVars.PROXY_STATUS_URL.value, dummy_proxy_status_url) - - route = respx_mock.get(dummy_proxy_status_url) - route.mock( - httpx.Response( - 200, - json={ - 'connected': True, - 'connectionError': None, - 'isManInTheMiddle': True, - }, - ) +async def test_basic_proxy_configuration_creation( + monkeypatch: pytest.MonkeyPatch, + respx_mock: MockRouter, + patched_apify_client: ApifyClientAsync, +) -> None: + dummy_proxy_status_url = 'http://dummy-proxy-status-url.com' + monkeypatch.setenv(ApifyEnvVars.TOKEN.value, 'DUMMY_TOKEN') + monkeypatch.setenv(ApifyEnvVars.PROXY_STATUS_URL.value, dummy_proxy_status_url) + + route = respx_mock.get(dummy_proxy_status_url) + route.mock( + httpx.Response( + 200, + json={ + 'connected': True, + 'connectionError': None, + 'isManInTheMiddle': True, + }, ) + ) - groups = ['GROUP1', 'GROUP2'] - country_code = 'US' - - await Actor.init() - - proxy_configuration = await Actor.create_proxy_configuration(groups=groups, country_code=country_code) - - assert proxy_configuration is not None - assert proxy_configuration._groups == groups - assert proxy_configuration._password == DUMMY_PASSWORD - assert proxy_configuration._country_code == country_code - - assert len(patched_apify_client.calls['user']['get']) == 1 # type: ignore - assert len(route.calls) == 1 - - await Actor.exit() - - async def test_create_proxy_configuration_actor_proxy_input( - self: TestActorCreateProxyConfiguration, - monkeypatch: pytest.MonkeyPatch, - respx_mock: MockRouter, - patched_apify_client: ApifyClientAsync, - ) -> None: - dummy_proxy_status_url = 'http://dummy-proxy-status-url.com' - dummy_proxy_url = 'http://dummy-proxy.com:8000' - - monkeypatch.setenv(ApifyEnvVars.TOKEN.value, 'DUMMY_TOKEN') - monkeypatch.setenv(ApifyEnvVars.PROXY_STATUS_URL.value, dummy_proxy_status_url) - - route = respx_mock.get(dummy_proxy_status_url) - route.mock( - httpx.Response( - 200, - json={ - 'connected': True, - 'connectionError': None, - 'isManInTheMiddle': True, - }, - ) - ) + groups = ['GROUP1', 'GROUP2'] + country_code = 'US' - await Actor.init() + await Actor.init() - proxy_configuration = await Actor.create_proxy_configuration(actor_proxy_input={}) - assert proxy_configuration is None + proxy_configuration = await Actor.create_proxy_configuration(groups=groups, country_code=country_code) - proxy_configuration = await Actor.create_proxy_configuration( - actor_proxy_input={ - 'useApifyProxy': False, - } - ) - assert proxy_configuration is None + assert proxy_configuration is not None + assert proxy_configuration._groups == groups + assert proxy_configuration._password == DUMMY_PASSWORD + assert proxy_configuration._country_code == country_code - proxy_configuration = await Actor.create_proxy_configuration( - actor_proxy_input={ - 'proxyUrls': [], - } - ) - assert proxy_configuration is None + assert len(patched_apify_client.calls['user']['get']) == 1 # type: ignore + assert len(route.calls) == 1 - proxy_configuration = await Actor.create_proxy_configuration( - actor_proxy_input={ - 'useApifyProxy': False, - 'proxyUrls': [dummy_proxy_url], - } - ) - assert proxy_configuration is not None - assert await proxy_configuration.new_url() == dummy_proxy_url + await Actor.exit() - proxy_configuration = await Actor.create_proxy_configuration( - actor_proxy_input={ - 'useApifyProxy': True, - } - ) - assert proxy_configuration is not None - assert await proxy_configuration.new_url() == f'http://auto:{DUMMY_PASSWORD}@proxy.apify.com:8000' - - groups = ['GROUP1', 'GROUP2'] - country_code = 'US' - proxy_configuration = await Actor.create_proxy_configuration( - actor_proxy_input={ - 'useApifyProxy': True, - 'apifyProxyGroups': groups, - 'apifyProxyCountry': country_code, - } - ) - assert proxy_configuration is not None - assert ( - await proxy_configuration.new_url() - == f'http://groups-{"+".join(groups)},country-{country_code}:{DUMMY_PASSWORD}@proxy.apify.com:8000' - ) - assert len(patched_apify_client.calls['user']['get']) == 2 # type: ignore - assert len(route.calls) == 2 +async def test_proxy_configuration_with_actor_proxy_input( + monkeypatch: pytest.MonkeyPatch, + respx_mock: MockRouter, + patched_apify_client: ApifyClientAsync, +) -> None: + dummy_proxy_status_url = 'http://dummy-proxy-status-url.com' + dummy_proxy_url = 'http://dummy-proxy.com:8000' - await Actor.exit() + monkeypatch.setenv(ApifyEnvVars.TOKEN.value, 'DUMMY_TOKEN') + monkeypatch.setenv(ApifyEnvVars.PROXY_STATUS_URL.value, dummy_proxy_status_url) + + route = respx_mock.get(dummy_proxy_status_url) + route.mock( + httpx.Response( + 200, + json={ + 'connected': True, + 'connectionError': None, + 'isManInTheMiddle': True, + }, + ) + ) + + await Actor.init() + + proxy_configuration = await Actor.create_proxy_configuration(actor_proxy_input={}) + assert proxy_configuration is None + + proxy_configuration = await Actor.create_proxy_configuration( + actor_proxy_input={ + 'useApifyProxy': False, + } + ) + assert proxy_configuration is None + + proxy_configuration = await Actor.create_proxy_configuration( + actor_proxy_input={ + 'proxyUrls': [], + } + ) + assert proxy_configuration is None + + proxy_configuration = await Actor.create_proxy_configuration( + actor_proxy_input={ + 'useApifyProxy': False, + 'proxyUrls': [dummy_proxy_url], + } + ) + assert proxy_configuration is not None + assert await proxy_configuration.new_url() == dummy_proxy_url + + proxy_configuration = await Actor.create_proxy_configuration( + actor_proxy_input={ + 'useApifyProxy': True, + } + ) + assert proxy_configuration is not None + assert await proxy_configuration.new_url() == f'http://auto:{DUMMY_PASSWORD}@proxy.apify.com:8000' + + groups = ['GROUP1', 'GROUP2'] + country_code = 'US' + proxy_configuration = await Actor.create_proxy_configuration( + actor_proxy_input={ + 'useApifyProxy': True, + 'apifyProxyGroups': groups, + 'apifyProxyCountry': country_code, + } + ) + assert proxy_configuration is not None + assert ( + await proxy_configuration.new_url() + == f'http://groups-{"+".join(groups)},country-{country_code}:{DUMMY_PASSWORD}@proxy.apify.com:8000' + ) + + assert len(patched_apify_client.calls['user']['get']) == 2 # type: ignore + assert len(route.calls) == 2 + + await Actor.exit() diff --git a/tests/unit/actor/test_actor_dataset.py b/tests/unit/actor/test_actor_dataset.py index 10400069..d666cc20 100644 --- a/tests/unit/actor/test_actor_dataset.py +++ b/tests/unit/actor/test_actor_dataset.py @@ -15,51 +15,50 @@ # Actual tests for the implementations are in storages/. -class TestActorOpenDataset: - async def test_throws_without_init(self: TestActorOpenDataset) -> None: - with pytest.raises(RuntimeError): - await Actor.open_dataset() - - async def test_same_references(self: TestActorOpenDataset) -> None: - async with Actor: - dataset1 = await Actor.open_dataset() - dataset2 = await Actor.open_dataset() - assert dataset1 is dataset2 - - dataset_name = 'non-default' - dataset_by_name_1 = await Actor.open_dataset(name=dataset_name) - dataset_by_name_2 = await Actor.open_dataset(name=dataset_name) - assert dataset_by_name_1 is dataset_by_name_2 - - dataset_by_id_1 = await Actor.open_dataset(id=dataset_by_name_1._id) - dataset_by_id_2 = await Actor.open_dataset(id=dataset_by_name_1._id) - - assert dataset_by_id_1 is dataset_by_name_1 - assert dataset_by_id_2 is dataset_by_id_1 - - async def test_open_datatset_based_env_var( - self: TestActorOpenDataset, - monkeypatch: pytest.MonkeyPatch, - memory_storage_client: MemoryStorageClient, - ) -> None: - default_dataset_id = 'my-new-default-id' - monkeypatch.setenv(ActorEnvVars.DEFAULT_DATASET_ID, default_dataset_id) - - async with Actor: - ddt = await Actor.open_dataset() - assert ddt._id == default_dataset_id - await memory_storage_client.dataset(ddt._id).delete() - - -class TestActorPushData: - async def test_push_data(self: TestActorPushData) -> None: - async with Actor as my_actor: - dataset = await my_actor.open_dataset() - desired_item_count = 100 - await dataset.push_data([{'id': i} for i in range(desired_item_count)]) - - dataset_info = await dataset.get_info() - assert dataset_info is not None - - list_page = await dataset.get_data(limit=desired_item_count) - assert {item['id'] for item in list_page.items} == set(range(desired_item_count)) +async def test_throws_error_without_actor_init() -> None: + with pytest.raises(RuntimeError): + await Actor.open_dataset() + + +async def test_open_dataset_returns_same_references() -> None: + async with Actor: + dataset1 = await Actor.open_dataset() + dataset2 = await Actor.open_dataset() + assert dataset1 is dataset2 + + dataset_name = 'non-default' + dataset_by_name_1 = await Actor.open_dataset(name=dataset_name) + dataset_by_name_2 = await Actor.open_dataset(name=dataset_name) + assert dataset_by_name_1 is dataset_by_name_2 + + dataset_by_id_1 = await Actor.open_dataset(id=dataset_by_name_1._id) + dataset_by_id_2 = await Actor.open_dataset(id=dataset_by_name_1._id) + + assert dataset_by_id_1 is dataset_by_name_1 + assert dataset_by_id_2 is dataset_by_id_1 + + +async def test_open_dataset_uses_env_var( + monkeypatch: pytest.MonkeyPatch, + memory_storage_client: MemoryStorageClient, +) -> None: + default_dataset_id = 'my-new-default-id' + monkeypatch.setenv(ActorEnvVars.DEFAULT_DATASET_ID, default_dataset_id) + + async with Actor: + ddt = await Actor.open_dataset() + assert ddt._id == default_dataset_id + await memory_storage_client.dataset(ddt._id).delete() + + +async def test_push_data_to_dataset() -> None: + async with Actor as my_actor: + dataset = await my_actor.open_dataset() + desired_item_count = 100 + await dataset.push_data([{'id': i} for i in range(desired_item_count)]) + + dataset_info = await dataset.get_info() + assert dataset_info is not None + + list_page = await dataset.get_data(limit=desired_item_count) + assert {item['id'] for item in list_page.items} == set(range(desired_item_count)) diff --git a/tests/unit/actor/test_actor_env_helpers.py b/tests/unit/actor/test_actor_env_helpers.py index 94b83a7a..1b5dd235 100644 --- a/tests/unit/actor/test_actor_env_helpers.py +++ b/tests/unit/actor/test_actor_env_helpers.py @@ -23,13 +23,13 @@ import pytest -async def test_is_at_home_local() -> None: +async def test_actor_is_not_at_home_when_local() -> None: async with Actor as actor: is_at_home = actor.is_at_home() assert is_at_home is False -async def test_is_at_home_on_apify(monkeypatch: pytest.MonkeyPatch) -> None: +async def test_actor_is_at_home_on_apify(monkeypatch: pytest.MonkeyPatch) -> None: print('setenv') monkeypatch.setenv(ApifyEnvVars.IS_AT_HOME, 'true') async with Actor as actor: @@ -37,7 +37,7 @@ async def test_is_at_home_on_apify(monkeypatch: pytest.MonkeyPatch) -> None: assert is_at_home is True -async def test_get_env_use_env_vars(monkeypatch: pytest.MonkeyPatch) -> None: +async def test_get_env_with_randomized_env_vars(monkeypatch: pytest.MonkeyPatch) -> None: ignored_env_vars = { ApifyEnvVars.INPUT_KEY, ApifyEnvVars.MEMORY_MBYTES, diff --git a/tests/unit/actor/test_actor_helpers.py b/tests/unit/actor/test_actor_helpers.py index f64ef43f..f71cd44c 100644 --- a/tests/unit/actor/test_actor_helpers.py +++ b/tests/unit/actor/test_actor_helpers.py @@ -1,6 +1,8 @@ from __future__ import annotations -from typing import TYPE_CHECKING, ClassVar +from typing import TYPE_CHECKING + +import pytest from apify_client import ApifyClientAsync from apify_shared.consts import ApifyEnvVars, WebhookEventType @@ -9,32 +11,12 @@ from apify._actor import _ActorType if TYPE_CHECKING: - import pytest - from ..conftest import ApifyClientAsyncPatcher -class TestActorNewClient: - async def test_actor_new_client_config(self: TestActorNewClient, monkeypatch: pytest.MonkeyPatch) -> None: - token = 'my-token' - monkeypatch.setenv(ApifyEnvVars.TOKEN, token) - my_actor = _ActorType() - await my_actor.init() - - client = my_actor.new_client() - assert isinstance(client, ApifyClientAsync) - assert client.token == token - - passed_token = 'my-passed-token' - client_with_token = my_actor.new_client(token=passed_token) - assert isinstance(client_with_token, ApifyClientAsync) - assert client_with_token.token == passed_token - - await my_actor.exit() - - -class TestActorCallStartAbortActor: - FAKE_ACTOR_RUN: ClassVar = { +@pytest.fixture +def fake_actor_run() -> dict: + return { 'id': 'asdfasdf', 'buildId': '3ads35', 'buildNumber': '3.4.5', @@ -73,123 +55,122 @@ class TestActorCallStartAbortActor: }, } - async def test_actor_call( - self: TestActorCallStartAbortActor, - apify_client_async_patcher: ApifyClientAsyncPatcher, - ) -> None: - apify_client_async_patcher.patch('actor', 'call', return_value=self.FAKE_ACTOR_RUN) - actor_id = 'some-actor-id' - - async with Actor: - await Actor.call(actor_id) - - assert len(apify_client_async_patcher.calls['actor']['call']) == 1 - # The first argument is ActorClientAsync, which was called, let's check its id. - assert apify_client_async_patcher.calls['actor']['call'][0][0][0].resource_id == actor_id - - async def test_actor_call_task( - self: TestActorCallStartAbortActor, - apify_client_async_patcher: ApifyClientAsyncPatcher, - ) -> None: - apify_client_async_patcher.patch('task', 'call', return_value=self.FAKE_ACTOR_RUN) - task_id = 'some-task-id' - - async with Actor: - await Actor.call_task(task_id) - - assert len(apify_client_async_patcher.calls['task']['call']) == 1 - assert apify_client_async_patcher.calls['task']['call'][0][0][0].resource_id == task_id - - async def test_actor_start( - self: TestActorCallStartAbortActor, - apify_client_async_patcher: ApifyClientAsyncPatcher, - ) -> None: - apify_client_async_patcher.patch('actor', 'start', return_value=self.FAKE_ACTOR_RUN) - actor_id = 'some-id' - - async with Actor: - await Actor.start(actor_id) - - assert len(apify_client_async_patcher.calls['actor']['start']) == 1 - assert apify_client_async_patcher.calls['actor']['start'][0][0][0].resource_id == actor_id - - async def test_actor_abort( - self: TestActorCallStartAbortActor, - apify_client_async_patcher: ApifyClientAsyncPatcher, - ) -> None: - apify_client_async_patcher.patch('run', 'abort', return_value=self.FAKE_ACTOR_RUN) - run_id = 'some-run-id' - - async with Actor: - await Actor.abort(run_id) - - assert len(apify_client_async_patcher.calls['run']['abort']) == 1 - assert apify_client_async_patcher.calls['run']['abort'][0][0][0].resource_id == run_id - - -class TestActorMethodsWorksOnlyOnPlatform: - # NOTE: These methods will be tested properly using integrations tests. - - async def test_actor_metamorpth_not_work_locally( - self: TestActorMethodsWorksOnlyOnPlatform, - caplog: pytest.LogCaptureFixture, - ) -> None: - caplog.set_level('WARNING') - async with Actor: - await Actor.metamorph('random-id') - - assert len(caplog.records) == 1 - assert caplog.records[0].levelname == 'ERROR' - assert 'Actor.metamorph() is only supported when running on the Apify platform.' in caplog.records[0].message - - async def test_actor_reboot_not_work_locally( - self: TestActorMethodsWorksOnlyOnPlatform, - caplog: pytest.LogCaptureFixture, - ) -> None: - caplog.set_level('WARNING') - async with Actor: - await Actor.reboot() - - assert len(caplog.records) == 1 - assert caplog.records[0].levelname == 'ERROR' - assert 'Actor.reboot() is only supported when running on the Apify platform.' in caplog.records[0].message - - async def test_actor_add_webhook_not_work_locally( - self: TestActorMethodsWorksOnlyOnPlatform, - caplog: pytest.LogCaptureFixture, - ) -> None: - caplog.set_level('WARNING') - async with Actor: - await Actor.add_webhook( - Webhook(event_types=[WebhookEventType.ACTOR_BUILD_ABORTED], request_url='https://example.com') - ) - - assert len(caplog.records) == 1 - assert caplog.records[0].levelname == 'ERROR' - assert 'Actor.add_webhook() is only supported when running on the Apify platform.' in caplog.records[0].message - - async def test_actor_set_status_message_mock_locally( - self: TestActorMethodsWorksOnlyOnPlatform, - caplog: pytest.LogCaptureFixture, - ) -> None: - caplog.set_level('INFO') - async with Actor: - await Actor.set_status_message('test-status-message') - - matching_records = [record for record in caplog.records if 'test-status-message' in record.message] - assert len(matching_records) == 1 - assert matching_records[0].levelname == 'INFO' - assert '[Status message]: test-status-message' in matching_records[0].message - - async def test_actor_set_status_message_terminal_mock_locally( - self: TestActorMethodsWorksOnlyOnPlatform, - caplog: pytest.LogCaptureFixture, - ) -> None: - caplog.set_level('INFO') - async with Actor: - await Actor.fail(status_message='test-terminal-message') - - matching_records = [record for record in caplog.records if 'test-terminal-message' in record.message] - assert len(matching_records) == 1 - assert matching_records[0].levelname == 'INFO' - assert '[Terminal status message]: test-terminal-message' in matching_records[0].message + +async def test_new_client_config_creation(monkeypatch: pytest.MonkeyPatch) -> None: + token = 'my-token' + monkeypatch.setenv(ApifyEnvVars.TOKEN, token) + my_actor = _ActorType() + await my_actor.init() + + client = my_actor.new_client() + assert isinstance(client, ApifyClientAsync) + assert client.token == token + + passed_token = 'my-passed-token' + client_with_token = my_actor.new_client(token=passed_token) + assert isinstance(client_with_token, ApifyClientAsync) + assert client_with_token.token == passed_token + + await my_actor.exit() + + +async def test_call_actor(apify_client_async_patcher: ApifyClientAsyncPatcher, fake_actor_run: dict) -> None: + apify_client_async_patcher.patch('actor', 'call', return_value=fake_actor_run) + actor_id = 'some-actor-id' + + async with Actor: + await Actor.call(actor_id) + + assert len(apify_client_async_patcher.calls['actor']['call']) == 1 + # The first argument is ActorClientAsync, which was called, let's check its id. + assert apify_client_async_patcher.calls['actor']['call'][0][0][0].resource_id == actor_id + + +async def test_call_actor_task(apify_client_async_patcher: ApifyClientAsyncPatcher, fake_actor_run: dict) -> None: + apify_client_async_patcher.patch('task', 'call', return_value=fake_actor_run) + task_id = 'some-task-id' + + async with Actor: + await Actor.call_task(task_id) + + assert len(apify_client_async_patcher.calls['task']['call']) == 1 + assert apify_client_async_patcher.calls['task']['call'][0][0][0].resource_id == task_id + + +async def test_start_actor(apify_client_async_patcher: ApifyClientAsyncPatcher, fake_actor_run: dict) -> None: + apify_client_async_patcher.patch('actor', 'start', return_value=fake_actor_run) + actor_id = 'some-id' + + async with Actor: + await Actor.start(actor_id) + + assert len(apify_client_async_patcher.calls['actor']['start']) == 1 + assert apify_client_async_patcher.calls['actor']['start'][0][0][0].resource_id == actor_id + + +async def test_abort_actor_run(apify_client_async_patcher: ApifyClientAsyncPatcher, fake_actor_run: dict) -> None: + apify_client_async_patcher.patch('run', 'abort', return_value=fake_actor_run) + run_id = 'some-run-id' + + async with Actor: + await Actor.abort(run_id) + + assert len(apify_client_async_patcher.calls['run']['abort']) == 1 + assert apify_client_async_patcher.calls['run']['abort'][0][0][0].resource_id == run_id + + +# NOTE: The following methods are properly tested using integrations tests. + + +async def test_metamorph_fails_locally(caplog: pytest.LogCaptureFixture) -> None: + caplog.set_level('WARNING') + async with Actor: + await Actor.metamorph('random-id') + + assert len(caplog.records) == 1 + assert caplog.records[0].levelname == 'ERROR' + assert 'Actor.metamorph() is only supported when running on the Apify platform.' in caplog.records[0].message + + +async def test_reboot_fails_locally(caplog: pytest.LogCaptureFixture) -> None: + caplog.set_level('WARNING') + async with Actor: + await Actor.reboot() + + assert len(caplog.records) == 1 + assert caplog.records[0].levelname == 'ERROR' + assert 'Actor.reboot() is only supported when running on the Apify platform.' in caplog.records[0].message + + +async def test_add_webhook_fails_locally(caplog: pytest.LogCaptureFixture) -> None: + caplog.set_level('WARNING') + async with Actor: + await Actor.add_webhook( + Webhook(event_types=[WebhookEventType.ACTOR_BUILD_ABORTED], request_url='https://example.com') + ) + + assert len(caplog.records) == 1 + assert caplog.records[0].levelname == 'ERROR' + assert 'Actor.add_webhook() is only supported when running on the Apify platform.' in caplog.records[0].message + + +async def test_set_status_message_locally(caplog: pytest.LogCaptureFixture) -> None: + caplog.set_level('INFO') + async with Actor: + await Actor.set_status_message('test-status-message') + + matching_records = [record for record in caplog.records if 'test-status-message' in record.message] + assert len(matching_records) == 1 + assert matching_records[0].levelname == 'INFO' + assert '[Status message]: test-status-message' in matching_records[0].message + + +async def test_set_terminal_status_message_locally(caplog: pytest.LogCaptureFixture) -> None: + caplog.set_level('INFO') + async with Actor: + await Actor.fail(status_message='test-terminal-message') + + matching_records = [record for record in caplog.records if 'test-terminal-message' in record.message] + assert len(matching_records) == 1 + assert matching_records[0].levelname == 'INFO' + assert '[Terminal status message]: test-terminal-message' in matching_records[0].message diff --git a/tests/unit/actor/test_actor_key_value_store.py b/tests/unit/actor/test_actor_key_value_store.py index b6ece50d..42d6b2d4 100644 --- a/tests/unit/actor/test_actor_key_value_store.py +++ b/tests/unit/actor/test_actor_key_value_store.py @@ -18,77 +18,77 @@ # NOTE: We only test the key-value store methods available on Actor class/instance. # Actual tests for the implementations are in storages/. -class TestOpenKeyValueStore: - async def test_same_references(self: TestOpenKeyValueStore) -> None: - async with Actor: - kvs1 = await Actor.open_key_value_store() - kvs2 = await Actor.open_key_value_store() - assert kvs1 is kvs2 - - kvs_name = 'non-default' - kvs_by_name_1 = await Actor.open_key_value_store(name=kvs_name) - kvs_by_name_2 = await Actor.open_key_value_store(name=kvs_name) - assert kvs_by_name_1 is kvs_by_name_2 - - kvs_by_id_1 = await Actor.open_key_value_store(id=kvs_by_name_1._id) - kvs_by_id_2 = await Actor.open_key_value_store(id=kvs_by_name_1._id) - assert kvs_by_id_1 is kvs_by_name_1 - assert kvs_by_id_2 is kvs_by_id_1 - - -class TestKeyValueStoreOnActor: - async def test_throws_without_init(self: TestKeyValueStoreOnActor) -> None: - with pytest.raises(RuntimeError): - await Actor.open_key_value_store() - - async def test_get_set_value(self: TestKeyValueStoreOnActor) -> None: - test_key = 'test_key' - test_value = 'test_value' - test_content_type = 'text/plain' - async with Actor as my_actor: - await my_actor.set_value(key=test_key, value=test_value, content_type=test_content_type) - value = await my_actor.get_value(key=test_key) - assert value == test_value - - async def test_get_input(self: TestKeyValueStoreOnActor, memory_storage_client: MemoryStorageClient) -> None: - input_key = 'INPUT' - test_input = {'foo': 'bar'} - - await memory_storage_client.key_value_stores().get_or_create(id='default') - await memory_storage_client.key_value_store('default').set_record( - key=input_key, - value=json_dumps(test_input), - content_type='application/json', - ) - - async with Actor as my_actor: - input = await my_actor.get_input() # noqa: A001 - assert input['foo'] == test_input['foo'] - - async def test_get_input_with_secrets( - self: TestKeyValueStoreOnActor, - monkeypatch: pytest.MonkeyPatch, - memory_storage_client: MemoryStorageClient, - ) -> None: - monkeypatch.setenv(ApifyEnvVars.INPUT_SECRETS_PRIVATE_KEY_FILE, PRIVATE_KEY_PEM_BASE64) - monkeypatch.setenv(ApifyEnvVars.INPUT_SECRETS_PRIVATE_KEY_PASSPHRASE, PRIVATE_KEY_PASSWORD) - - input_key = 'INPUT' - secret_string = 'secret-string' - encrypted_secret = public_encrypt(secret_string, public_key=PUBLIC_KEY) - input_with_secret = { - 'foo': 'bar', - 'secret': f'{ENCRYPTED_INPUT_VALUE_PREFIX}:{encrypted_secret["encrypted_password"]}:{encrypted_secret["encrypted_value"]}', # noqa: E501 - } - - await memory_storage_client.key_value_stores().get_or_create(id='default') - await memory_storage_client.key_value_store('default').set_record( - key=input_key, - value=json_dumps(input_with_secret), - content_type='application/json', - ) - - async with Actor as my_actor: - input = await my_actor.get_input() # noqa: A001 - assert input['foo'] == input_with_secret['foo'] - assert input['secret'] == secret_string +async def test_open_returns_same_references() -> None: + async with Actor: + kvs1 = await Actor.open_key_value_store() + kvs2 = await Actor.open_key_value_store() + assert kvs1 is kvs2 + + kvs_name = 'non-default' + kvs_by_name_1 = await Actor.open_key_value_store(name=kvs_name) + kvs_by_name_2 = await Actor.open_key_value_store(name=kvs_name) + assert kvs_by_name_1 is kvs_by_name_2 + + kvs_by_id_1 = await Actor.open_key_value_store(id=kvs_by_name_1._id) + kvs_by_id_2 = await Actor.open_key_value_store(id=kvs_by_name_1._id) + assert kvs_by_id_1 is kvs_by_name_1 + assert kvs_by_id_2 is kvs_by_id_1 + + +async def test_open_throws_without_init() -> None: + with pytest.raises(RuntimeError): + await Actor.open_key_value_store() + + +async def test_set_and_get_value() -> None: + test_key = 'test_key' + test_value = 'test_value' + test_content_type = 'text/plain' + async with Actor as my_actor: + await my_actor.set_value(key=test_key, value=test_value, content_type=test_content_type) + value = await my_actor.get_value(key=test_key) + assert value == test_value + + +async def test_get_input(memory_storage_client: MemoryStorageClient) -> None: + input_key = 'INPUT' + test_input = {'foo': 'bar'} + + await memory_storage_client.key_value_stores().get_or_create(id='default') + await memory_storage_client.key_value_store('default').set_record( + key=input_key, + value=json_dumps(test_input), + content_type='application/json', + ) + + async with Actor as my_actor: + input = await my_actor.get_input() # noqa: A001 + assert input['foo'] == test_input['foo'] + + +async def test_get_input_with_encrypted_secrets( + monkeypatch: pytest.MonkeyPatch, + memory_storage_client: MemoryStorageClient, +) -> None: + monkeypatch.setenv(ApifyEnvVars.INPUT_SECRETS_PRIVATE_KEY_FILE, PRIVATE_KEY_PEM_BASE64) + monkeypatch.setenv(ApifyEnvVars.INPUT_SECRETS_PRIVATE_KEY_PASSPHRASE, PRIVATE_KEY_PASSWORD) + + input_key = 'INPUT' + secret_string = 'secret-string' + encrypted_secret = public_encrypt(secret_string, public_key=PUBLIC_KEY) + input_with_secret = { + 'foo': 'bar', + 'secret': f'{ENCRYPTED_INPUT_VALUE_PREFIX}:{encrypted_secret["encrypted_password"]}:{encrypted_secret["encrypted_value"]}', # noqa: E501 + } + + await memory_storage_client.key_value_stores().get_or_create(id='default') + await memory_storage_client.key_value_store('default').set_record( + key=input_key, + value=json_dumps(input_with_secret), + content_type='application/json', + ) + + async with Actor as my_actor: + input = await my_actor.get_input() # noqa: A001 + assert input['foo'] == input_with_secret['foo'] + assert input['secret'] == secret_string diff --git a/tests/unit/actor/test_actor_lifecycle.py b/tests/unit/actor/test_actor_lifecycle.py index 8053f2f3..eb0e41e9 100644 --- a/tests/unit/actor/test_actor_lifecycle.py +++ b/tests/unit/actor/test_actor_lifecycle.py @@ -17,146 +17,148 @@ from apify._actor import _ActorType -class TestActorInit: - async def test_async_with_actor_properly_initialize(self: TestActorInit) -> None: - async with Actor: - assert cast(Proxy, apify._actor.Actor).__wrapped__ is not None - assert cast(Proxy, apify._actor.Actor).__wrapped__._is_initialized - assert not cast(Proxy, apify._actor.Actor).__wrapped__._is_initialized +async def test_actor_properly_init_with_async() -> None: + async with Actor: + assert cast(Proxy, apify._actor.Actor).__wrapped__ is not None + assert cast(Proxy, apify._actor.Actor).__wrapped__._is_initialized + assert not cast(Proxy, apify._actor.Actor).__wrapped__._is_initialized - async def test_actor_init(self: TestActorInit) -> None: - my_actor = _ActorType() - await my_actor.init() - assert my_actor._is_initialized is True +async def test_actor_init() -> None: + my_actor = _ActorType() + + await my_actor.init() + assert my_actor._is_initialized is True - await my_actor.exit() - assert my_actor._is_initialized is False + await my_actor.exit() + assert my_actor._is_initialized is False - async def test_double_init(self: TestActorInit) -> None: - my_actor = _ActorType() +async def test_double_init_raises_error() -> None: + my_actor = _ActorType() + + await my_actor.init() + with pytest.raises(RuntimeError): await my_actor.init() - with pytest.raises(RuntimeError): - await my_actor.init() - await my_actor.exit() + await my_actor.exit() + await Actor.init() + with pytest.raises(RuntimeError): await Actor.init() - with pytest.raises(RuntimeError): - await Actor.init() + await Actor.exit() + + +async def test_actor_exits_cleanly_with_events(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv(ApifyEnvVars.SYSTEM_INFO_INTERVAL_MILLIS, '100') + monkeypatch.setenv(ApifyEnvVars.PERSIST_STATE_INTERVAL_MILLIS, '100') + on_persist = [] + on_system_info = [] + + def on_event(event_type: Event) -> Callable: + nonlocal on_persist + nonlocal on_system_info + if event_type == Event.PERSIST_STATE: + return lambda data: on_persist.append(data) + if event_type == Event.SYSTEM_INFO: + return lambda data: on_system_info.append(data) + return lambda data: print(data) + + my_actor = _ActorType() + async with my_actor: + assert my_actor._is_initialized + my_actor.on(Event.PERSIST_STATE, on_event(Event.PERSIST_STATE)) + my_actor.on(Event.SYSTEM_INFO, on_event(Event.SYSTEM_INFO)) + await asyncio.sleep(1) + + on_persist_count = len(on_persist) + on_system_info_count = len(on_system_info) + assert on_persist_count != 0 + assert on_system_info_count != 0 + + # Check if events stopped emitting. + await asyncio.sleep(0.2) + assert on_persist_count == len(on_persist) + assert on_system_info_count == len(on_system_info) + + +async def test_exit_without_init_raises_error() -> None: + with pytest.raises(RuntimeError): await Actor.exit() -class TestActorExit: - async def test_with_actor_exit(self: TestActorExit, monkeypatch: pytest.MonkeyPatch) -> None: - monkeypatch.setenv(ApifyEnvVars.SYSTEM_INFO_INTERVAL_MILLIS, '100') - monkeypatch.setenv(ApifyEnvVars.PERSIST_STATE_INTERVAL_MILLIS, '100') - on_persist = [] - on_system_info = [] - - def on_event(event_type: Event) -> Callable: - nonlocal on_persist - nonlocal on_system_info - if event_type == Event.PERSIST_STATE: - return lambda data: on_persist.append(data) - if event_type == Event.SYSTEM_INFO: - return lambda data: on_system_info.append(data) - return lambda data: print(data) - - my_actor = _ActorType() - async with my_actor: +async def test_actor_fails_cleanly() -> None: + async with _ActorType() as my_actor: + assert my_actor._is_initialized + await my_actor.fail() + assert my_actor._is_initialized is False + + +async def test_actor_handles_failure_gracefully() -> None: + my_actor = None + + with contextlib.suppress(Exception): + async with _ActorType() as my_actor: assert my_actor._is_initialized - my_actor.on(Event.PERSIST_STATE, on_event(Event.PERSIST_STATE)) - my_actor.on(Event.SYSTEM_INFO, on_event(Event.SYSTEM_INFO)) - await asyncio.sleep(1) + raise Exception('Failed') # noqa: TRY002 - on_persist_count = len(on_persist) - on_system_info_count = len(on_system_info) - assert on_persist_count != 0 - assert on_system_info_count != 0 + assert my_actor is not None + assert my_actor._is_initialized is False - # Check if events stopped emitting. - await asyncio.sleep(0.2) - assert on_persist_count == len(on_persist) - assert on_system_info_count == len(on_system_info) - async def test_raise_on_exit_without_init(self: TestActorExit) -> None: - with pytest.raises(RuntimeError): - await Actor.exit() +async def test_fail_without_init_raises_error() -> None: + with pytest.raises(RuntimeError): + await Actor.fail() -class TestActorFail: - async def test_with_actor_fail(self: TestActorFail) -> None: - async with _ActorType() as my_actor: - assert my_actor._is_initialized - await my_actor.fail() - assert my_actor._is_initialized is False - - async def test_with_actor_failed(self: TestActorFail) -> None: - my_actor = None - - with contextlib.suppress(Exception): - async with _ActorType() as my_actor: - assert my_actor._is_initialized - raise Exception('Failed') # noqa: TRY002 - - assert my_actor is not None - assert my_actor._is_initialized is False - - async def test_raise_on_fail_without_init(self: TestActorFail) -> None: - with pytest.raises(RuntimeError): - await Actor.fail() - - async def test_actor_reboot_not_work_locally(self: TestActorFail) -> None: - with pytest.raises(RuntimeError): - await Actor.reboot() - - -class TestMigratingEvent: - async def test_migrating_event(self: TestMigratingEvent, monkeypatch: pytest.MonkeyPatch) -> None: - # This should test whether when you get a MIGRATING event, - # the Actor automatically emits the PERSIST_STATE event with data `{'isMigrating': True}` - monkeypatch.setenv(ApifyEnvVars.PERSIST_STATE_INTERVAL_MILLIS, '500') - monkeypatch.setenv(ApifyEnvVars.IS_AT_HOME, '1') - - persist_state_events_data = [] - - def log_persist_state(data: Any) -> None: - nonlocal persist_state_events_data - persist_state_events_data.append(data) - - async def handler(websocket: websockets.server.WebSocketServerProtocol) -> None: - await websocket.wait_closed() - - async with websockets.server.serve(handler, host='localhost') as ws_server: - port: int = ws_server.sockets[0].getsockname()[1] # type: ignore[index] - monkeypatch.setenv(ApifyEnvVars.ACTOR_EVENTS_WS_URL, f'ws://localhost:{port}') - - async with Actor: - Actor.on(Event.PERSIST_STATE, log_persist_state) - await asyncio.sleep(2) - - for socket in ws_server.websockets: - await socket.send( - json.dumps( - { - 'name': 'migrating', - 'data': { - 'isMigrating': True, - }, - } - ) +async def test_actor_reboot_fails_locally() -> None: + with pytest.raises(RuntimeError): + await Actor.reboot() + + +async def test_actor_handles_migrating_event_correctly(monkeypatch: pytest.MonkeyPatch) -> None: + # This should test whether when you get a MIGRATING event, + # the Actor automatically emits the PERSIST_STATE event with data `{'isMigrating': True}` + monkeypatch.setenv(ApifyEnvVars.PERSIST_STATE_INTERVAL_MILLIS, '500') + monkeypatch.setenv(ApifyEnvVars.IS_AT_HOME, '1') + + persist_state_events_data = [] + + def log_persist_state(data: Any) -> None: + nonlocal persist_state_events_data + persist_state_events_data.append(data) + + async def handler(websocket: websockets.server.WebSocketServerProtocol) -> None: + await websocket.wait_closed() + + async with websockets.server.serve(handler, host='localhost') as ws_server: + port: int = ws_server.sockets[0].getsockname()[1] # type: ignore[index] + monkeypatch.setenv(ApifyEnvVars.ACTOR_EVENTS_WS_URL, f'ws://localhost:{port}') + + async with Actor: + Actor.on(Event.PERSIST_STATE, log_persist_state) + await asyncio.sleep(2) + + for socket in ws_server.websockets: + await socket.send( + json.dumps( + { + 'name': 'migrating', + 'data': { + 'isMigrating': True, + }, + } ) + ) - await asyncio.sleep(1) + await asyncio.sleep(1) - assert len(persist_state_events_data) >= 3 + assert len(persist_state_events_data) >= 3 - print(persist_state_events_data) + print(persist_state_events_data) - # Check if the last event is from the migration - assert persist_state_events_data.pop() == EventPersistStateData(is_migrating=True) + # Check if the last event is from the migration + assert persist_state_events_data.pop() == EventPersistStateData(is_migrating=True) - # Check if all the other events are regular persist state events - for event_data in persist_state_events_data: - assert event_data == EventPersistStateData(is_migrating=False) + # Check if all the other events are regular persist state events + for event_data in persist_state_events_data: + assert event_data == EventPersistStateData(is_migrating=False) diff --git a/tests/unit/actor/test_actor_log.py b/tests/unit/actor/test_actor_log.py index 083724d3..41217486 100644 --- a/tests/unit/actor/test_actor_log.py +++ b/tests/unit/actor/test_actor_log.py @@ -14,83 +14,81 @@ import pytest -class TestActorLog: - async def test_actor_log( - self: TestActorLog, - caplog: pytest.LogCaptureFixture, - monkeypatch: pytest.MonkeyPatch, - ) -> None: - caplog.set_level(logging.DEBUG, logger='apify') - monkeypatch.setenv('APIFY_IS_AT_HOME', '1') - - with contextlib.suppress(RuntimeError): - async with Actor(configure_logging=False): - # Test Actor.log - Actor.log.debug('Debug message') - Actor.log.info('Info message') - - # Test logger - logger.warning('Warning message') - logger.error('Error message') - - # Test that exception is logged with the traceback - try: - raise ValueError('Dummy ValueError') - except Exception: - Actor.log.exception('Exception message') - - # Test multiline message being indented correctly - logger.info('Multi\nline\nlog\nmessage') - - # Test that exception in Actor.main is logged with the traceback - raise RuntimeError('Dummy RuntimeError') - - assert len(caplog.records) == 12 - - assert caplog.records[0].levelno == logging.INFO - assert caplog.records[0].message == 'Initializing Actor...' - - assert caplog.records[1].levelno == logging.INFO - assert caplog.records[1].message == 'System info' - assert getattr(caplog.records[1], 'apify_sdk_version', None) == __version__ - assert getattr(caplog.records[1], 'apify_client_version', None) == apify_client_version - assert getattr(caplog.records[1], 'python_version', None) == '.'.join([str(x) for x in sys.version_info[:3]]) - assert getattr(caplog.records[1], 'os', None) == sys.platform - - assert caplog.records[2].levelno == logging.DEBUG - assert caplog.records[2].message.startswith('APIFY_ACTOR_EVENTS_WS_URL env var not set') - - assert caplog.records[3].levelno == logging.DEBUG - assert caplog.records[3].message == 'Debug message' - - assert caplog.records[4].levelno == logging.INFO - assert caplog.records[4].message == 'Info message' - - assert caplog.records[5].levelno == logging.WARNING - assert caplog.records[5].message == 'Warning message' - - assert caplog.records[6].levelno == logging.ERROR - assert caplog.records[6].message == 'Error message' - - assert caplog.records[7].levelno == logging.ERROR - assert caplog.records[7].message == 'Exception message' - assert caplog.records[7].exc_info is not None - assert caplog.records[7].exc_info[0] is ValueError - assert isinstance(caplog.records[7].exc_info[1], ValueError) - assert str(caplog.records[7].exc_info[1]) == 'Dummy ValueError' - - assert caplog.records[8].levelno == logging.INFO - assert caplog.records[8].message == 'Multi\nline\nlog\nmessage' - - assert caplog.records[9].levelno == logging.ERROR - assert caplog.records[9].message == 'Actor failed with an exception' - assert caplog.records[9].exc_info is not None - assert caplog.records[9].exc_info[0] is RuntimeError - assert isinstance(caplog.records[9].exc_info[1], RuntimeError) - assert str(caplog.records[9].exc_info[1]) == 'Dummy RuntimeError' - - assert caplog.records[10].levelno == logging.INFO - assert caplog.records[10].message == 'Exiting Actor' - - assert caplog.records[11].levelno == logging.DEBUG - assert caplog.records[11].message == 'Not calling sys.exit(91) because Actor is running in an unit test' +async def test_actor_logs_messages_correctly( + caplog: pytest.LogCaptureFixture, + monkeypatch: pytest.MonkeyPatch, +) -> None: + caplog.set_level(logging.DEBUG, logger='apify') + monkeypatch.setenv('APIFY_IS_AT_HOME', '1') + + with contextlib.suppress(RuntimeError): + async with Actor(configure_logging=False): + # Test Actor.log + Actor.log.debug('Debug message') + Actor.log.info('Info message') + + # Test logger + logger.warning('Warning message') + logger.error('Error message') + + # Test that exception is logged with the traceback + try: + raise ValueError('Dummy ValueError') + except Exception: + Actor.log.exception('Exception message') + + # Test multiline message being indented correctly + logger.info('Multi\nline\nlog\nmessage') + + # Test that exception in Actor.main is logged with the traceback + raise RuntimeError('Dummy RuntimeError') + + assert len(caplog.records) == 12 + + assert caplog.records[0].levelno == logging.INFO + assert caplog.records[0].message == 'Initializing Actor...' + + assert caplog.records[1].levelno == logging.INFO + assert caplog.records[1].message == 'System info' + assert getattr(caplog.records[1], 'apify_sdk_version', None) == __version__ + assert getattr(caplog.records[1], 'apify_client_version', None) == apify_client_version + assert getattr(caplog.records[1], 'python_version', None) == '.'.join([str(x) for x in sys.version_info[:3]]) + assert getattr(caplog.records[1], 'os', None) == sys.platform + + assert caplog.records[2].levelno == logging.DEBUG + assert caplog.records[2].message.startswith('APIFY_ACTOR_EVENTS_WS_URL env var not set') + + assert caplog.records[3].levelno == logging.DEBUG + assert caplog.records[3].message == 'Debug message' + + assert caplog.records[4].levelno == logging.INFO + assert caplog.records[4].message == 'Info message' + + assert caplog.records[5].levelno == logging.WARNING + assert caplog.records[5].message == 'Warning message' + + assert caplog.records[6].levelno == logging.ERROR + assert caplog.records[6].message == 'Error message' + + assert caplog.records[7].levelno == logging.ERROR + assert caplog.records[7].message == 'Exception message' + assert caplog.records[7].exc_info is not None + assert caplog.records[7].exc_info[0] is ValueError + assert isinstance(caplog.records[7].exc_info[1], ValueError) + assert str(caplog.records[7].exc_info[1]) == 'Dummy ValueError' + + assert caplog.records[8].levelno == logging.INFO + assert caplog.records[8].message == 'Multi\nline\nlog\nmessage' + + assert caplog.records[9].levelno == logging.ERROR + assert caplog.records[9].message == 'Actor failed with an exception' + assert caplog.records[9].exc_info is not None + assert caplog.records[9].exc_info[0] is RuntimeError + assert isinstance(caplog.records[9].exc_info[1], RuntimeError) + assert str(caplog.records[9].exc_info[1]) == 'Dummy RuntimeError' + + assert caplog.records[10].levelno == logging.INFO + assert caplog.records[10].message == 'Exiting Actor' + + assert caplog.records[11].levelno == logging.DEBUG + assert caplog.records[11].message == 'Not calling sys.exit(91) because Actor is running in an unit test' diff --git a/tests/unit/actor/test_actor_non_default_instance.py b/tests/unit/actor/test_actor_non_default_instance.py index e9d34a0b..68e380aa 100644 --- a/tests/unit/actor/test_actor_non_default_instance.py +++ b/tests/unit/actor/test_actor_non_default_instance.py @@ -3,6 +3,8 @@ from apify import Actor, Configuration -async def test_actor_non_default_instance() -> None: - async with Actor(Configuration(internal_timeout=timedelta(minutes=111))) as actor: +async def test_actor_with_non_default_config() -> None: + config = Configuration(internal_timeout=timedelta(minutes=111)) + + async with Actor(config) as actor: assert actor.config.internal_timeout == timedelta(minutes=111) diff --git a/tests/unit/actor/test_actor_request_queue.py b/tests/unit/actor/test_actor_request_queue.py index de58d26c..5504715f 100644 --- a/tests/unit/actor/test_actor_request_queue.py +++ b/tests/unit/actor/test_actor_request_queue.py @@ -7,23 +7,23 @@ # NOTE: We only test the references here. Actual tests for the implementations are in storages/ -class TestActorOpenRequestQueue: - async def test_throws_without_init(self: TestActorOpenRequestQueue) -> None: - with pytest.raises(RuntimeError): - await Actor.open_request_queue() - - async def test_same_references(self: TestActorOpenRequestQueue) -> None: - async with Actor: - rq1 = await Actor.open_request_queue() - rq2 = await Actor.open_request_queue() - assert rq1 is rq2 - - rq_name = 'non-default' - rq_by_name_1 = await Actor.open_key_value_store(name=rq_name) - rq_by_name_2 = await Actor.open_key_value_store(name=rq_name) - assert rq_by_name_1 is rq_by_name_2 - - rq_by_id_1 = await Actor.open_key_value_store(id=rq_by_name_1._id) - rq_by_id_2 = await Actor.open_key_value_store(id=rq_by_name_1._id) - assert rq_by_id_1 is rq_by_name_1 - assert rq_by_id_2 is rq_by_id_1 +async def test_open_throws_without_init() -> None: + with pytest.raises(RuntimeError): + await Actor.open_request_queue() + + +async def test_open_returns_same_references() -> None: + async with Actor: + rq1 = await Actor.open_request_queue() + rq2 = await Actor.open_request_queue() + assert rq1 is rq2 + + rq_name = 'non-default' + rq_by_name_1 = await Actor.open_key_value_store(name=rq_name) + rq_by_name_2 = await Actor.open_key_value_store(name=rq_name) + assert rq_by_name_1 is rq_by_name_2 + + rq_by_id_1 = await Actor.open_key_value_store(id=rq_by_name_1._id) + rq_by_id_2 = await Actor.open_key_value_store(id=rq_by_name_1._id) + assert rq_by_id_1 is rq_by_name_1 + assert rq_by_id_2 is rq_by_id_1 diff --git a/tests/unit/scrapy/middlewares/test_apify_proxy.py b/tests/unit/scrapy/middlewares/test_apify_proxy.py index 9d089acc..41b21fa3 100644 --- a/tests/unit/scrapy/middlewares/test_apify_proxy.py +++ b/tests/unit/scrapy/middlewares/test_apify_proxy.py @@ -60,8 +60,17 @@ def proxy_configuration() -> ProxyConfiguration: ({'APIFY_PROXY_SETTINGS': {'useApifyProxy': None}}, NotConfigured), ({'APIFY_PROXY_SETTINGS': {'useApifyProxy': False}}, NotConfigured), ], + ids=[ + 'valid_proxy_settings_enabled', + 'valid_proxy_settings_with_groups', + 'no_settings', + 'irrelevant_setting', + 'empty_proxy_settings', + 'use_apify_proxy_none', + 'use_apify_proxy_false', + ], ) -def test__from_crawler( +def test_initialization_from_crawler( crawler: Crawler, monkeypatch: pytest.MonkeyPatch, settings: dict, @@ -81,8 +90,9 @@ def test__from_crawler( @pytest.mark.parametrize( 'expected_proxy_url', ['http://username:password@proxy.example.com:8080', 'http://hsdfgds:52354325@proxy.apify.com:5748'], + ids=['example_proxy', 'apify_proxy'], ) -async def test__get_new_proxy_url( +async def test_retrieves_new_proxy_url( monkeypatch: pytest.MonkeyPatch, middleware: ApifyHttpProxyMiddleware, proxy_configuration: ProxyConfiguration, @@ -104,8 +114,9 @@ async def mock_new_url() -> str: ('http://user123:pass456@proxy.apify.com:5748', None, b'Basic dXNlcjEyMzpwYXNzNDU2'), ('http://@proxy.example.com:2943', ValueError, b''), ], + ids=['valid_example_proxy', 'valid_apify_proxy', 'invalid_proxy_missing_credentials'], ) -async def test__process_request( +async def test_process_request_with_proxy( monkeypatch: pytest.MonkeyPatch, middleware: ApifyHttpProxyMiddleware, spider: DummySpider, @@ -131,8 +142,9 @@ async def mock_get_new_proxy_url() -> ParseResult: @pytest.mark.parametrize( 'exception', [TunnelError(), ValueError()], + ids=['tunnel_error', 'value_error'], ) -def test__process_exception( +def test_handles_exceptions( middleware: ApifyHttpProxyMiddleware, spider: DummySpider, dummy_request: Request, diff --git a/tests/unit/scrapy/pipelines/test_actor_dataset_push.py b/tests/unit/scrapy/pipelines/test_actor_dataset_push.py index 0eb59599..6ef386aa 100644 --- a/tests/unit/scrapy/pipelines/test_actor_dataset_push.py +++ b/tests/unit/scrapy/pipelines/test_actor_dataset_push.py @@ -37,37 +37,38 @@ def pipeline() -> ActorDatasetPushPipeline: @dataclass(frozen=True) -class TestCase: +class ItemTestCase: item: Item item_dict: dict expected_exception: type[Exception] | None -test_cases = [ - TestCase( - item=DummyItem(a='string', b=123, c=False), - item_dict={'a': 'string', 'b': 123, 'c': False}, - expected_exception=None, - ), - TestCase( - item=TitleItem(url='https://example.com', title='Example'), - item_dict={'url': 'https://example.com', 'title': 'Example'}, - expected_exception=None, - ), - TestCase( - item=None, - item_dict={}, - expected_exception=TypeError, - ), -] - - -@pytest.mark.parametrize('tc', test_cases) -async def test__process_item( +@pytest.mark.parametrize( + 'tc', + [ + ItemTestCase( + item=DummyItem(a='string', b=123, c=False), + item_dict={'a': 'string', 'b': 123, 'c': False}, + expected_exception=None, + ), + ItemTestCase( + item=TitleItem(url='https://example.com', title='Example'), + item_dict={'url': 'https://example.com', 'title': 'Example'}, + expected_exception=None, + ), + ItemTestCase( + item=None, + item_dict={}, + expected_exception=TypeError, + ), + ], + ids=['dummy_item_with_valid_data', 'title_item_with_valid_data', 'none_item_raises_type_error'], +) +async def test_process_item( monkeypatch: pytest.MonkeyPatch, pipeline: ActorDatasetPushPipeline, spider: Spider, - tc: TestCase, + tc: ItemTestCase, ) -> None: dataset = [] diff --git a/tests/unit/scrapy/requests/test_to_apify_request.py b/tests/unit/scrapy/requests/test_to_apify_request.py index 3da084dd..b3f1d07e 100644 --- a/tests/unit/scrapy/requests/test_to_apify_request.py +++ b/tests/unit/scrapy/requests/test_to_apify_request.py @@ -19,7 +19,7 @@ def spider() -> DummySpider: return DummySpider() -def test__to_apify_request__simple(spider: Spider) -> None: +def test_creates_simple_request(spider: Spider) -> None: scrapy_request = Request(url='https://example.com') apify_request = to_apify_request(scrapy_request, spider) @@ -31,7 +31,7 @@ def test__to_apify_request__simple(spider: Spider) -> None: assert isinstance(user_data.get('scrapy_request'), str) -def test__to_apify_request__headers(spider: Spider) -> None: +def test_handles_headers(spider: Spider) -> None: scrapy_request_headers = Headers({'Authorization': 'Bearer access_token'}) scrapy_request = Request(url='https://example.com', headers=scrapy_request_headers) @@ -41,7 +41,7 @@ def test__to_apify_request__headers(spider: Spider) -> None: assert apify_request.headers == HttpHeaders(scrapy_request_headers.to_unicode_dict()) -def test__to_apify_request__without_id_and_unique_key(spider: Spider) -> None: +def test_without_id_and_unique_key(spider: Spider) -> None: scrapy_request = Request( url='https://example.com', method='GET', @@ -61,7 +61,7 @@ def test__to_apify_request__without_id_and_unique_key(spider: Spider) -> None: assert isinstance(user_data.get('scrapy_request'), str) -def test__to_apify_request__with_id_and_unique_key(spider: Spider) -> None: +def test_with_id_and_unique_key(spider: Spider) -> None: scrapy_request = Request( url='https://example.com', method='GET', @@ -87,7 +87,7 @@ def test__to_apify_request__with_id_and_unique_key(spider: Spider) -> None: assert isinstance(user_data.get('scrapy_request'), str) -def test__to_apify_request__invalid_scrapy_request(spider: Spider) -> None: +def test_invalid_scrapy_request_returns_none(spider: Spider) -> None: scrapy_request = 'invalid_request' apify_request = to_apify_request(scrapy_request, spider) diff --git a/tests/unit/scrapy/requests/test_to_scrapy_request.py b/tests/unit/scrapy/requests/test_to_scrapy_request.py index 5b72d380..3a436f51 100644 --- a/tests/unit/scrapy/requests/test_to_scrapy_request.py +++ b/tests/unit/scrapy/requests/test_to_scrapy_request.py @@ -21,7 +21,7 @@ def spider() -> DummySpider: return DummySpider() -def test__to_scrapy_request__without_reconstruction(spider: Spider) -> None: +def test_without_reconstruction(spider: Spider) -> None: # Without reconstruction of encoded Scrapy request apify_request = CrawleeRequest( url='https://example.com', @@ -40,7 +40,7 @@ def test__to_scrapy_request__without_reconstruction(spider: Spider) -> None: assert apify_request.unique_key == scrapy_request.meta.get('apify_request_unique_key') -def test__to_scrapy_request__without_reconstruction_with_optional_fields(spider: Spider) -> None: +def test_without_reconstruction_with_optional_fields(spider: Spider) -> None: # Without reconstruction of encoded Scrapy request apify_request = CrawleeRequest( url='https://crawlee.dev', @@ -62,7 +62,7 @@ def test__to_scrapy_request__without_reconstruction_with_optional_fields(spider: assert apify_request.user_data == scrapy_request.meta.get('userData') -def test__to_scrapy_request__with_reconstruction(spider: Spider) -> None: +def test_with_reconstruction(spider: Spider) -> None: # With reconstruction of encoded Scrapy request apify_request = CrawleeRequest( url='https://apify.com', @@ -84,7 +84,7 @@ def test__to_scrapy_request__with_reconstruction(spider: Spider) -> None: assert apify_request.user_data == scrapy_request.meta.get('userData') -def test__to_scrapy_request__with_reconstruction_with_optional_fields(spider: Spider) -> None: +def test_with_reconstruction_with_optional_fields(spider: Spider) -> None: # With reconstruction of encoded Scrapy request apify_request = CrawleeRequest( url='https://apify.com', @@ -109,7 +109,7 @@ def test__to_scrapy_request__with_reconstruction_with_optional_fields(spider: Sp assert apify_request.user_data == scrapy_request.meta.get('userData') -def test__to_scrapy_request__invalid_request_for_reconstruction(spider: Spider) -> None: +def test_invalid_request_for_reconstruction(spider: Spider) -> None: apify_request = CrawleeRequest( url='https://example.com', method='GET', diff --git a/tests/unit/scrapy/utils/test_apply_apify_settings.py b/tests/unit/scrapy/utils/test_apply_apify_settings.py index 42de1e9b..64e67a24 100644 --- a/tests/unit/scrapy/utils/test_apply_apify_settings.py +++ b/tests/unit/scrapy/utils/test_apply_apify_settings.py @@ -5,14 +5,14 @@ from apify.scrapy.utils import apply_apify_settings -def test__apply_apify_settings__overrides_scheduler() -> None: +def test_overrides_scheduler() -> None: settings = Settings() new_settings = apply_apify_settings(settings=settings) assert new_settings.get('SCHEDULER') == 'apify.scrapy.scheduler.ApifyScheduler' -def test__apply_apify_settings__update_item_pipelines() -> None: +def test_updates_item_pipelines() -> None: settings = Settings( { 'ITEM_PIPELINES': { @@ -28,7 +28,7 @@ def test__apply_apify_settings__update_item_pipelines() -> None: } -def test__apply_apify_settings__update_downloader_middlewares() -> None: +def test_updates_downloader_middlewares() -> None: settings = Settings( { 'DOWNLOADER_MIDDLEWARES': { @@ -49,7 +49,7 @@ def test__apply_apify_settings__update_downloader_middlewares() -> None: } -def test__apply_apify_settings__add_proxy_config() -> None: +def test_adds_proxy_config() -> None: settings = Settings() new_settings = apply_apify_settings(settings=settings) assert new_settings.get('APIFY_PROXY_SETTINGS') is None diff --git a/tests/unit/scrapy/utils/test_get_basic_auth_header.py b/tests/unit/scrapy/utils/test_get_basic_auth_header.py index ef4b833f..a2644803 100644 --- a/tests/unit/scrapy/utils/test_get_basic_auth_header.py +++ b/tests/unit/scrapy/utils/test_get_basic_auth_header.py @@ -8,19 +8,20 @@ @dataclass(frozen=True) -class TestCase: +class ItemTestCase: username: str password: str expected_auth_header: bytes -test_cases = [ - TestCase('username', 'password', b'Basic dXNlcm5hbWU6cGFzc3dvcmQ='), - TestCase('john_smith', 'secret_password_123', b'Basic am9obl9zbWl0aDpzZWNyZXRfcGFzc3dvcmRfMTIz'), -] - - -@pytest.mark.parametrize('tc', test_cases) -def test__get_basic_auth_header(tc: TestCase) -> None: +@pytest.mark.parametrize( + 'tc', + [ + ItemTestCase('username', 'password', b'Basic dXNlcm5hbWU6cGFzc3dvcmQ='), + ItemTestCase('john_smith', 'secret_password_123', b'Basic am9obl9zbWl0aDpzZWNyZXRfcGFzc3dvcmRfMTIz'), + ], + ids=['simple_username_password', 'complex_username_password'], +) +def test_basic_auth_header_generation(tc: ItemTestCase) -> None: auth_header = get_basic_auth_header(tc.username, tc.password) assert auth_header == tc.expected_auth_header diff --git a/tests/unit/test_crypto.py b/tests/unit/test_crypto.py index f820a59f..24da3b6b 100644 --- a/tests/unit/test_crypto.py +++ b/tests/unit/test_crypto.py @@ -15,89 +15,93 @@ PUBLIC_KEY = _load_public_key(PUBLIC_KEY_PEM_BASE64) -class TestCrypto: - def test_encrypt_decrypt_varions_string(self: TestCrypto) -> None: - for value in [ - crypto_random_object_id(10), - '👍', - '!', - '@', - '#', - '$', - '%', - '^', - '&', - '*', - '(', - ')', - '-', - '_', - '=', - '+', - '[', - ']', - '{', - '}', - '|', - ';', - ':', - '"', - "'", - ',', - '.', - '<', - '>', - '?', - '/', - '~', - ]: - encrypted = public_encrypt(value, public_key=PUBLIC_KEY) - decrypted_value = private_decrypt(**encrypted, private_key=PRIVATE_KEY) - assert decrypted_value == value - - def test_throw_if_password_is_not_valid(self: TestCrypto) -> None: - test_value = 'test' - encrypted = public_encrypt(test_value, public_key=PUBLIC_KEY) - encrypted['encrypted_password'] = base64.b64encode(b'invalid_password').decode('utf-8') - - with pytest.raises(ValueError, match='Ciphertext length must be equal to key size.'): - private_decrypt(**encrypted, private_key=PRIVATE_KEY) - - def test_throw_error_if_cipher_is_manipulated(self: TestCrypto) -> None: - test_value = 'test2' - encrypted = public_encrypt(test_value, public_key=PUBLIC_KEY) - encrypted['encrypted_value'] = base64.b64encode( - b'invalid_cipher' + base64.b64decode(encrypted['encrypted_value'].encode('utf-8')), - ).decode('utf-8') - - with pytest.raises(ValueError, match='Decryption failed, malformed encrypted value or password.'): - private_decrypt(**encrypted, private_key=PRIVATE_KEY) - - def test_same_encrypted_value_should_return_deffirent_cipher(self: TestCrypto) -> None: - test_value = 'test3' - encrypted1 = public_encrypt(test_value, public_key=PUBLIC_KEY) - encrypted2 = public_encrypt(test_value, public_key=PUBLIC_KEY) - assert encrypted1['encrypted_value'] != encrypted2['encrypted_value'] - - # Check if the method is compatible with js version of the same method in: - # https://github.com/apify/apify-shared-js/blob/master/packages/utilities/src/crypto.ts - def test_private_encrypt_node_js_encrypted_value(self: TestCrypto) -> None: - value = 'encrypted_with_node_js' - # This was encrypted with nodejs version of the same method. - encrypted_value_with_node_js = { - 'encrypted_password': 'lw0ez64/T1UcCQMLfhucZ6VIfMcf/TKni7PmXlL/ZRA4nmdGYz7/YQUzGWzKbLChrpqbG21DHxPIubUIQFDFE1ASkLvoSd0Ks8/wjKHMyhp+hsg5aSh9EZK6pBFpp6FeHoinV80+UURTvJuSVbWd1Orw5Frl41taP6RK3uNJlXikmgs8Xc7mShFEENgkz6y9+Pbe7jpcKkaJ2U/h7FN0eNON189kNFYVuAE1n2N6C3Q7dFnjl2e1btqErvg5Vu7ZS4BbX3wgC2qLYySGnqI3BNI5VGhAnncnQcjHb+85qG+LKoPekgY9I0s0kGMxiz/bmy1mYm9O+Lj1mbVUr7BDjQ==', # noqa: E501 - 'encrypted_value': 'k8nkZDCi0hRfBc0RRefxeSHeGV0X60N03VCrhRhENKXBjrF/tEg=', - } - decrypted_value = private_decrypt( - **encrypted_value_with_node_js, - private_key=PRIVATE_KEY, - ) - +def test_encrypt_decrypt_various_strings() -> None: + for value in [ + crypto_random_object_id(10), + '👍', + '!', + '@', + '#', + '$', + '%', + '^', + '&', + '*', + '(', + ')', + '-', + '_', + '=', + '+', + '[', + ']', + '{', + '}', + '|', + ';', + ':', + '"', + "'", + ',', + '.', + '<', + '>', + '?', + '/', + '~', + ]: + encrypted = public_encrypt(value, public_key=PUBLIC_KEY) + decrypted_value = private_decrypt(**encrypted, private_key=PRIVATE_KEY) assert decrypted_value == value - def test_crypto_random_object_id(self: TestCrypto) -> None: - assert len(crypto_random_object_id()) == 17 - assert len(crypto_random_object_id(5)) == 5 - long_random_object_id = crypto_random_object_id(1000) - for char in long_random_object_id: - assert char in 'abcdefghijklmnopqrstuvwxyzABCEDFGHIJKLMNOPQRSTUVWXYZ0123456789' + +def test_decryption_fails_with_invalid_password() -> None: + test_value = 'test' + encrypted = public_encrypt(test_value, public_key=PUBLIC_KEY) + encrypted['encrypted_password'] = base64.b64encode(b'invalid_password').decode('utf-8') + + with pytest.raises(ValueError, match='Ciphertext length must be equal to key size.'): + private_decrypt(**encrypted, private_key=PRIVATE_KEY) + + +def test_decryption_fails_with_manipulated_cipher() -> None: + test_value = 'test2' + encrypted = public_encrypt(test_value, public_key=PUBLIC_KEY) + encrypted['encrypted_value'] = base64.b64encode( + b'invalid_cipher' + base64.b64decode(encrypted['encrypted_value'].encode('utf-8')), + ).decode('utf-8') + + with pytest.raises(ValueError, match='Decryption failed, malformed encrypted value or password.'): + private_decrypt(**encrypted, private_key=PRIVATE_KEY) + + +def test_same_value_produces_different_cipher_each_time() -> None: + test_value = 'test3' + encrypted1 = public_encrypt(test_value, public_key=PUBLIC_KEY) + encrypted2 = public_encrypt(test_value, public_key=PUBLIC_KEY) + assert encrypted1['encrypted_value'] != encrypted2['encrypted_value'] + + +# Check if the method is compatible with js version of the same method in: +# https://github.com/apify/apify-shared-js/blob/master/packages/utilities/src/crypto.ts +def test_private_decrypt_with_node_js_encrypted_value() -> None: + value = 'encrypted_with_node_js' + # This was encrypted with nodejs version of the same method. + encrypted_value_with_node_js = { + 'encrypted_password': 'lw0ez64/T1UcCQMLfhucZ6VIfMcf/TKni7PmXlL/ZRA4nmdGYz7/YQUzGWzKbLChrpqbG21DHxPIubUIQFDFE1ASkLvoSd0Ks8/wjKHMyhp+hsg5aSh9EZK6pBFpp6FeHoinV80+UURTvJuSVbWd1Orw5Frl41taP6RK3uNJlXikmgs8Xc7mShFEENgkz6y9+Pbe7jpcKkaJ2U/h7FN0eNON189kNFYVuAE1n2N6C3Q7dFnjl2e1btqErvg5Vu7ZS4BbX3wgC2qLYySGnqI3BNI5VGhAnncnQcjHb+85qG+LKoPekgY9I0s0kGMxiz/bmy1mYm9O+Lj1mbVUr7BDjQ==', # noqa: E501 + 'encrypted_value': 'k8nkZDCi0hRfBc0RRefxeSHeGV0X60N03VCrhRhENKXBjrF/tEg=', + } + decrypted_value = private_decrypt( + **encrypted_value_with_node_js, + private_key=PRIVATE_KEY, + ) + + assert decrypted_value == value + + +def test_crypto_random_object_id_length_and_charset() -> None: + assert len(crypto_random_object_id()) == 17 + assert len(crypto_random_object_id(5)) == 5 + long_random_object_id = crypto_random_object_id(1000) + for char in long_random_object_id: + assert char in 'abcdefghijklmnopqrstuvwxyzABCEDFGHIJKLMNOPQRSTUVWXYZ0123456789' diff --git a/tests/unit/test_event_manager.py b/tests/unit/test_event_manager.py deleted file mode 100644 index 9161709d..00000000 --- a/tests/unit/test_event_manager.py +++ /dev/null @@ -1,203 +0,0 @@ -from __future__ import annotations - -import asyncio -import json -import logging -from collections import defaultdict -from typing import Any, Callable -from unittest.mock import Mock - -import pytest -import websockets -import websockets.server - -from apify_shared.consts import ActorEnvVars -from crawlee.events._types import Event - -from apify import Configuration -from apify._platform_event_manager import EventManager, PlatformEventManager, SystemInfoEventData - - -class TestEventManagerLocal: - async def test_lifecycle_local(self, caplog: pytest.LogCaptureFixture) -> None: - caplog.set_level(logging.DEBUG, logger='apify') - - async with PlatformEventManager(Configuration.get_global_configuration()): - pass - - assert len(caplog.records) == 1 - assert caplog.records[0].levelno == logging.DEBUG - assert ( - caplog.records[0].message - == 'APIFY_ACTOR_EVENTS_WS_URL env var not set, no events from Apify platform will be emitted.' - ) - - async def test_event_handling_local(self) -> None: - async with EventManager() as event_manager: - event_calls = defaultdict(list) - - def on_event(event: Event, id: int | None = None) -> Callable: - def event_handler(data: Any) -> None: - nonlocal event_calls - event_calls[event].append((id, data)) - - return event_handler - - handler_system_info = on_event(Event.SYSTEM_INFO) - dummy_system_info = Mock() - dummy_system_info_2 = Mock() - - # Basic test with just one handler on event - # Test adding the handler - event_manager.on(event=Event.SYSTEM_INFO, listener=handler_system_info) - event_manager.emit(event=Event.SYSTEM_INFO, event_data=dummy_system_info) - await asyncio.sleep(0.1) - assert event_calls[Event.SYSTEM_INFO] == [(None, dummy_system_info)] - event_calls[Event.SYSTEM_INFO].clear() - - # Test removing the handler - event_manager.off(event=Event.SYSTEM_INFO, listener=handler_system_info) - event_manager.emit(event=Event.SYSTEM_INFO, event_data=dummy_system_info_2) - await asyncio.sleep(0.1) - assert event_calls[Event.SYSTEM_INFO] == [] - - # Complicated test with multiple handlers - # Add three handlers - handler_persist_state_1 = on_event(Event.PERSIST_STATE, 1) - handler_persist_state_2 = on_event(Event.PERSIST_STATE, 2) - handler_persist_state_3 = on_event(Event.PERSIST_STATE, 3) - event_manager.on(event=Event.PERSIST_STATE, listener=handler_persist_state_1) - event_manager.on(event=Event.PERSIST_STATE, listener=handler_persist_state_2) - event_manager.on(event=Event.PERSIST_STATE, listener=handler_persist_state_3) - - dummy_persist_state = Mock() - - # Test that they all work, and that they're called in order - event_manager.emit(event=Event.PERSIST_STATE, event_data=dummy_persist_state) - await asyncio.sleep(0.1) - assert event_calls[Event.PERSIST_STATE] == [ - (1, dummy_persist_state), - (2, dummy_persist_state), - (3, dummy_persist_state), - ] - event_calls[Event.PERSIST_STATE].clear() - - # Test that if you remove one, the others stay - event_manager.off(event=Event.PERSIST_STATE, listener=handler_persist_state_3) - event_manager.emit(event=Event.PERSIST_STATE, event_data=dummy_persist_state) - await asyncio.sleep(0.1) - assert event_calls[Event.PERSIST_STATE] == [ - (1, dummy_persist_state), - (2, dummy_persist_state), - ] - event_calls[Event.PERSIST_STATE].clear() - - # Test that removing all in bulk works - event_manager.off(event=Event.PERSIST_STATE) - event_manager.emit(event=Event.PERSIST_STATE, event_data=dummy_persist_state) - await asyncio.sleep(0.1) - assert event_calls[Event.PERSIST_STATE] == [] - - async def test_event_async_handling_local(self) -> None: - dummy_system_info = Mock() - - async with EventManager() as event_manager: - event_calls = [] - - async def event_handler(data: Any) -> None: - nonlocal event_calls - await asyncio.sleep(2) - event_calls.append(data) - - # Test that async event handlers work, and that they don't block the main thread - event_manager.on(event=Event.SYSTEM_INFO, listener=event_handler) - event_manager.emit(event=Event.SYSTEM_INFO, event_data=dummy_system_info) - await asyncio.sleep(1) - assert event_calls == [] - await asyncio.sleep(2) - assert event_calls == [dummy_system_info] - - -class TestEventManagerOnPlatform: - async def test_lifecycle_on_platform_without_websocket( - self: TestEventManagerOnPlatform, - monkeypatch: pytest.MonkeyPatch, - ) -> None: - monkeypatch.setenv(ActorEnvVars.EVENTS_WEBSOCKET_URL, 'ws://localhost:56565') - event_manager = PlatformEventManager(Configuration.get_global_configuration()) - - with pytest.raises(RuntimeError, match='Error connecting to platform events websocket!'): - async with event_manager: - pass - - async def test_lifecycle_on_platform(self: TestEventManagerOnPlatform, monkeypatch: pytest.MonkeyPatch) -> None: - connected_ws_clients: set[websockets.server.WebSocketServerProtocol] = set() - - async def handler(websocket: websockets.server.WebSocketServerProtocol) -> None: - connected_ws_clients.add(websocket) - try: - await websocket.wait_closed() - finally: - connected_ws_clients.remove(websocket) - - async with websockets.server.serve(handler, host='localhost') as ws_server: - # When you don't specify a port explicitly, the websocket connection is opened on a random free port. - # We need to find out which port is that. - port: int = ws_server.sockets[0].getsockname()[1] # type: ignore[index] - monkeypatch.setenv(ActorEnvVars.EVENTS_WEBSOCKET_URL, f'ws://localhost:{port}') - - async with PlatformEventManager(Configuration.get_global_configuration()): - assert len(connected_ws_clients) == 1 - - async def test_event_handling_on_platform( - self: TestEventManagerOnPlatform, - monkeypatch: pytest.MonkeyPatch, - ) -> None: - connected_ws_clients: set[websockets.server.WebSocketServerProtocol] = set() - - async def handler(websocket: websockets.server.WebSocketServerProtocol) -> None: - connected_ws_clients.add(websocket) - try: - await websocket.wait_closed() - finally: - connected_ws_clients.remove(websocket) - - async def send_platform_event(event_name: Event, data: Any = None) -> None: - message: dict[str, Any] = {'name': event_name.value} - if data: - message['data'] = data - - websockets.broadcast(connected_ws_clients, json.dumps(message)) - - async with websockets.server.serve(handler, host='localhost') as ws_server: - # When you don't specify a port explicitly, the websocket connection is opened on a random free port. - # We need to find out which port is that. - port: int = ws_server.sockets[0].getsockname()[1] # type: ignore[index] - monkeypatch.setenv(ActorEnvVars.EVENTS_WEBSOCKET_URL, f'ws://localhost:{port}') - - dummy_system_info = { - 'memAvgBytes': 19328860.328293584, - 'memCurrentBytes': 65171456, - 'memMaxBytes': 65171456, - 'cpuAvgUsage': 2.0761105633130397, - 'cpuMaxUsage': 53.941134593993326, - 'cpuCurrentUsage': 8.45549815498155, - 'isCpuOverloaded': False, - 'createdAt': '2024-08-09T16:04:16.161Z', - } - SystemInfoEventData.model_validate(dummy_system_info) - - async with PlatformEventManager(Configuration.get_global_configuration()) as event_manager: - event_calls = [] - - def listener(data: Any) -> None: - event_calls.append(json.loads(data.model_dump_json(by_alias=True)) if data else None) - - event_manager.on(event=Event.SYSTEM_INFO, listener=listener) - - # Test sending event with data - await send_platform_event(Event.SYSTEM_INFO, dummy_system_info) - await asyncio.sleep(0.1) - assert len(event_calls) == 1 - assert event_calls[0]['cpuInfo']['usedRatio'] == 8.45549815498155 - event_calls.clear() diff --git a/tests/unit/test_platform_event_manager.py b/tests/unit/test_platform_event_manager.py new file mode 100644 index 00000000..cb7719f3 --- /dev/null +++ b/tests/unit/test_platform_event_manager.py @@ -0,0 +1,203 @@ +from __future__ import annotations + +import asyncio +import json +import logging +from collections import defaultdict +from typing import Any, Callable +from unittest.mock import Mock + +import pytest +import websockets +import websockets.server + +from apify_shared.consts import ActorEnvVars +from crawlee.events._types import Event + +from apify import Configuration +from apify._platform_event_manager import PlatformEventManager, SystemInfoEventData + + +async def test_lifecycle_local(caplog: pytest.LogCaptureFixture) -> None: + caplog.set_level(logging.DEBUG, logger='apify') + config = Configuration.get_global_configuration() + + async with PlatformEventManager(config): + pass + + assert len(caplog.records) == 1 + assert caplog.records[0].levelno == logging.DEBUG + assert ( + caplog.records[0].message + == 'APIFY_ACTOR_EVENTS_WS_URL env var not set, no events from Apify platform will be emitted.' + ) + + +async def test_event_handling_local() -> None: + config = Configuration.get_global_configuration() + + async with PlatformEventManager(config) as event_manager: + event_calls = defaultdict(list) + + def on_event(event: Event, id: int | None = None) -> Callable: + def event_handler(data: Any) -> None: + nonlocal event_calls + event_calls[event].append((id, data)) + + return event_handler + + handler_system_info = on_event(Event.SYSTEM_INFO) + dummy_system_info = Mock() + dummy_system_info_2 = Mock() + + # Basic test with just one handler on event + # Test adding the handler + event_manager.on(event=Event.SYSTEM_INFO, listener=handler_system_info) + event_manager.emit(event=Event.SYSTEM_INFO, event_data=dummy_system_info) + await asyncio.sleep(0.1) + assert event_calls[Event.SYSTEM_INFO] == [(None, dummy_system_info)] + event_calls[Event.SYSTEM_INFO].clear() + + # Test removing the handler + event_manager.off(event=Event.SYSTEM_INFO, listener=handler_system_info) + event_manager.emit(event=Event.SYSTEM_INFO, event_data=dummy_system_info_2) + await asyncio.sleep(0.1) + assert event_calls[Event.SYSTEM_INFO] == [] + + # Complicated test with multiple handlers + # Add three handlers + handler_persist_state_1 = on_event(Event.PERSIST_STATE, 1) + handler_persist_state_2 = on_event(Event.PERSIST_STATE, 2) + handler_persist_state_3 = on_event(Event.PERSIST_STATE, 3) + event_manager.on(event=Event.PERSIST_STATE, listener=handler_persist_state_1) + event_manager.on(event=Event.PERSIST_STATE, listener=handler_persist_state_2) + event_manager.on(event=Event.PERSIST_STATE, listener=handler_persist_state_3) + + dummy_persist_state = Mock() + + # Test that they all work, and that they're called in order + event_manager.emit(event=Event.PERSIST_STATE, event_data=dummy_persist_state) + await asyncio.sleep(0.1) + assert event_calls[Event.PERSIST_STATE] == [ + (1, dummy_persist_state), + (2, dummy_persist_state), + (3, dummy_persist_state), + ] + event_calls[Event.PERSIST_STATE].clear() + + # Test that if you remove one, the others stay + event_manager.off(event=Event.PERSIST_STATE, listener=handler_persist_state_3) + event_manager.emit(event=Event.PERSIST_STATE, event_data=dummy_persist_state) + await asyncio.sleep(0.1) + assert event_calls[Event.PERSIST_STATE] == [ + (1, dummy_persist_state), + (2, dummy_persist_state), + ] + event_calls[Event.PERSIST_STATE].clear() + + # Test that removing all in bulk works + event_manager.off(event=Event.PERSIST_STATE) + event_manager.emit(event=Event.PERSIST_STATE, event_data=dummy_persist_state) + await asyncio.sleep(0.1) + assert event_calls[Event.PERSIST_STATE] == [] + + +async def test_event_async_handling_local() -> None: + dummy_system_info = Mock() + config = Configuration.get_global_configuration() + + async with PlatformEventManager(config) as event_manager: + event_calls = [] + + async def event_handler(data: Any) -> None: + nonlocal event_calls + await asyncio.sleep(2) + event_calls.append(data) + + # Test that async event handlers work, and that they don't block the main thread + event_manager.on(event=Event.SYSTEM_INFO, listener=event_handler) + event_manager.emit(event=Event.SYSTEM_INFO, event_data=dummy_system_info) + await asyncio.sleep(1) + assert event_calls == [] + await asyncio.sleep(2) + assert event_calls == [dummy_system_info] + + +async def test_lifecycle_on_platform_without_websocket(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv(ActorEnvVars.EVENTS_WEBSOCKET_URL, 'ws://localhost:56565') + event_manager = PlatformEventManager(Configuration.get_global_configuration()) + + with pytest.raises(RuntimeError, match='Error connecting to platform events websocket!'): + async with event_manager: + pass + + +async def test_lifecycle_on_platform(monkeypatch: pytest.MonkeyPatch) -> None: + connected_ws_clients: set[websockets.server.WebSocketServerProtocol] = set() + + async def handler(websocket: websockets.server.WebSocketServerProtocol) -> None: + connected_ws_clients.add(websocket) + try: + await websocket.wait_closed() + finally: + connected_ws_clients.remove(websocket) + + async with websockets.server.serve(handler, host='localhost') as ws_server: + # When you don't specify a port explicitly, the websocket connection is opened on a random free port. + # We need to find out which port is that. + port: int = ws_server.sockets[0].getsockname()[1] # type: ignore[index] + monkeypatch.setenv(ActorEnvVars.EVENTS_WEBSOCKET_URL, f'ws://localhost:{port}') + + async with PlatformEventManager(Configuration.get_global_configuration()): + assert len(connected_ws_clients) == 1 + + +async def test_event_handling_on_platform(monkeypatch: pytest.MonkeyPatch) -> None: + connected_ws_clients: set[websockets.server.WebSocketServerProtocol] = set() + + async def handler(websocket: websockets.server.WebSocketServerProtocol) -> None: + connected_ws_clients.add(websocket) + try: + await websocket.wait_closed() + finally: + connected_ws_clients.remove(websocket) + + async def send_platform_event(event_name: Event, data: Any = None) -> None: + message: dict[str, Any] = {'name': event_name.value} + if data: + message['data'] = data + + websockets.broadcast(connected_ws_clients, json.dumps(message)) + + async with websockets.server.serve(handler, host='localhost') as ws_server: + # When you don't specify a port explicitly, the websocket connection is opened on a random free port. + # We need to find out which port is that. + port: int = ws_server.sockets[0].getsockname()[1] # type: ignore[index] + monkeypatch.setenv(ActorEnvVars.EVENTS_WEBSOCKET_URL, f'ws://localhost:{port}') + + dummy_system_info = { + 'memAvgBytes': 19328860.328293584, + 'memCurrentBytes': 65171456, + 'memMaxBytes': 65171456, + 'cpuAvgUsage': 2.0761105633130397, + 'cpuMaxUsage': 53.941134593993326, + 'cpuCurrentUsage': 8.45549815498155, + 'isCpuOverloaded': False, + 'createdAt': '2024-08-09T16:04:16.161Z', + } + SystemInfoEventData.model_validate(dummy_system_info) + + async with PlatformEventManager(Configuration.get_global_configuration()) as event_manager: + event_calls = [] + + def listener(data: Any) -> None: + event_calls.append(json.loads(data.model_dump_json(by_alias=True)) if data else None) + + event_manager.on(event=Event.SYSTEM_INFO, listener=listener) + + # Test sending event with data + await send_platform_event(Event.SYSTEM_INFO, dummy_system_info) + await asyncio.sleep(0.1) + assert len(event_calls) == 1 + assert event_calls[0]['cpuInfo']['usedRatio'] == 8.45549815498155 + event_calls.clear() diff --git a/tests/unit/test_proxy_configuration.py b/tests/unit/test_proxy_configuration.py index 485d8342..77a0d91f 100644 --- a/tests/unit/test_proxy_configuration.py +++ b/tests/unit/test_proxy_configuration.py @@ -1,4 +1,5 @@ # ruff: noqa: ARG001 ARG005 + from __future__ import annotations import asyncio @@ -19,532 +20,530 @@ from .conftest import ApifyClientAsyncPatcher - DUMMY_PASSWORD = 'DUMMY_PASSWORD' -class TestProxyConfiguration: - def test_constructor_basic(self: TestProxyConfiguration) -> None: - groups = ['GROUP1', 'GROUP2'] - password = 'abcd1234' - country_code = 'US' - proxy_configuration = ProxyConfiguration( - groups=groups, - password=password, - country_code=country_code, +@pytest.fixture +def patched_apify_client(apify_client_async_patcher: ApifyClientAsyncPatcher) -> ApifyClientAsync: + apify_client_async_patcher.patch( + 'user', + 'get', + return_value={ + 'proxy': { + 'password': DUMMY_PASSWORD, + }, + }, + ) + return ApifyClientAsync() + + +def test_basic_constructor() -> None: + groups = ['GROUP1', 'GROUP2'] + password = 'abcd1234' + country_code = 'US' + proxy_configuration = ProxyConfiguration( + groups=groups, + password=password, + country_code=country_code, + ) + assert proxy_configuration._groups == groups + assert proxy_configuration._password == password + assert proxy_configuration._country_code == country_code + + +def test_fallback_constructor(monkeypatch: pytest.MonkeyPatch) -> None: + hostname = 'example.com' + password = 'abcd1234' + port = 1234 + + monkeypatch.setenv('APIFY_PROXY_HOSTNAME', hostname) + monkeypatch.setenv('APIFY_PROXY_PASSWORD', password) + monkeypatch.setenv('APIFY_PROXY_PORT', f'{port}') + + proxy_configuration = ProxyConfiguration() + + assert proxy_configuration._hostname == hostname + assert proxy_configuration._password == password + assert proxy_configuration._port == port + + +def test_invalid_arguments() -> None: + for invalid_groups, bad_group_index in [ + (['abc', 'de-f', 'geh'], 1), + (['', 'def', 'geh'], 0), + (['abc', 'DEF', 'geh$'], 2), + ([111, 'DEF', 'geh$'], 2), + ]: + with pytest.raises(ValueError, match=re.escape(str(invalid_groups[bad_group_index]))): # type: ignore + ProxyConfiguration(groups=invalid_groups) # type: ignore + + for invalid_country_code in ['CZE', 'aa', 'DDDD', 1111]: + with pytest.raises(ValueError, match=re.escape(str(invalid_country_code))): + ProxyConfiguration(country_code=invalid_country_code) # type: ignore + + with pytest.raises(ValueError, match='Exactly one of .* must be specified'): + ProxyConfiguration( + proxy_urls=['http://proxy.com:1111'], + new_url_function=lambda session_id=None, request=None: 'http://proxy.com:2222', ) - assert proxy_configuration._groups == groups - assert proxy_configuration._password == password - assert proxy_configuration._country_code == country_code - - def test_constructor_fallback(self: TestProxyConfiguration, monkeypatch: pytest.MonkeyPatch) -> None: - hostname = 'example.com' - password = 'abcd1234' - port = 1234 - - monkeypatch.setenv('APIFY_PROXY_HOSTNAME', hostname) - monkeypatch.setenv('APIFY_PROXY_PASSWORD', password) - monkeypatch.setenv('APIFY_PROXY_PORT', f'{port}') - - proxy_configuration = ProxyConfiguration() - - assert proxy_configuration._hostname == hostname - assert proxy_configuration._password == password - assert proxy_configuration._port == port - - def test__fails_with_invalid_arguments(self: TestProxyConfiguration) -> None: - for invalid_groups, bad_group_index in [ - (['abc', 'de-f', 'geh'], 1), - (['', 'def', 'geh'], 0), - (['abc', 'DEF', 'geh$'], 2), - ([111, 'DEF', 'geh$'], 2), - ]: - with pytest.raises(ValueError, match=re.escape(str(invalid_groups[bad_group_index]))): # type: ignore - ProxyConfiguration(groups=invalid_groups) # type: ignore - - for invalid_country_code in ['CZE', 'aa', 'DDDD', 1111]: - with pytest.raises(ValueError, match=re.escape(str(invalid_country_code))): - ProxyConfiguration(country_code=invalid_country_code) # type: ignore - - with pytest.raises(ValueError, match='Exactly one of .* must be specified'): - ProxyConfiguration( - proxy_urls=['http://proxy.com:1111'], - new_url_function=lambda session_id=None, request=None: 'http://proxy.com:2222', - ) - - with pytest.raises(ValueError, match='Cannot combine custom proxies with Apify Proxy'): - ProxyConfiguration(proxy_urls=['http://proxy.com:1111'], groups=['GROUP1']) - - with pytest.raises(ValueError, match=re.escape('bad-url')): - ProxyConfiguration(proxy_urls=['bad-url']) - - with pytest.raises(ValueError, match='Cannot combine custom proxies with Apify Proxy'): - ProxyConfiguration( - new_url_function=lambda session_id=None, request=None: 'http://proxy.com:2222', groups=['GROUP1'] - ) - - -class TestProxyConfigurationNewUrl: - async def test_new_url_basic(self: TestProxyConfigurationNewUrl) -> None: - groups = ['GROUP1', 'GROUP2'] - password = 'abcd1234' - country_code = 'US' - proxy_configuration = ProxyConfiguration( - groups=groups, - password=password, - country_code=country_code, + + with pytest.raises(ValueError, match='Cannot combine custom proxies with Apify Proxy'): + ProxyConfiguration(proxy_urls=['http://proxy.com:1111'], groups=['GROUP1']) + + with pytest.raises(ValueError, match=re.escape('bad-url')): + ProxyConfiguration(proxy_urls=['bad-url']) + + with pytest.raises(ValueError, match='Cannot combine custom proxies with Apify Proxy'): + ProxyConfiguration( + new_url_function=lambda session_id=None, request=None: 'http://proxy.com:2222', groups=['GROUP1'] ) - proxy_url = await proxy_configuration.new_url() - expected_username = f'groups-{"+".join(groups)},country-{country_code}' + +async def test_new_url_basic() -> None: + groups = ['GROUP1', 'GROUP2'] + password = 'abcd1234' + country_code = 'US' + proxy_configuration = ProxyConfiguration( + groups=groups, + password=password, + country_code=country_code, + ) + proxy_url = await proxy_configuration.new_url() + + expected_username = f'groups-{"+".join(groups)},country-{country_code}' + expected_hostname = 'proxy.apify.com' + expected_port = 8000 + + assert proxy_url == f'http://{expected_username}:{password}@{expected_hostname}:{expected_port}' + + +async def test_new_url_with_session_ids() -> None: + groups = ['GROUP1', 'GROUP2'] + password = 'abcd1234' + country_code = 'US' + proxy_configuration = ProxyConfiguration( + groups=groups, + password=password, + country_code=country_code, + ) + + session_ids: list[str] = [ + 'a', + 'a_b', + 'a_2', + 'a_1_b', + 'aaa~BBB', + '1', + '0.34252352', + '123456', + 'XXXXXXXXXXxxxxxxxxxxXXXXXXXXXXxxxxxxxxxxXXXXXXXXXX', + ] + for session_id in session_ids: + expected_username = f'groups-{"+".join(groups)},session-{session_id},country-{country_code}' expected_hostname = 'proxy.apify.com' expected_port = 8000 + proxy_url = await proxy_configuration.new_url(session_id) + assert proxy_url == f'http://{expected_username}:{password}@{expected_hostname}:{expected_port}' - async def test_new_url_session_id(self: TestProxyConfigurationNewUrl) -> None: - groups = ['GROUP1', 'GROUP2'] - password = 'abcd1234' - country_code = 'US' - proxy_configuration = ProxyConfiguration( - groups=groups, - password=password, - country_code=country_code, - ) + for invalid_session_id in ['a-b', 'a$b', 'XXXXXXXXXXxxxxxxxxxxXXXXXXXXXXxxxxxxxxxxXXXXXXXXXXTooLong']: + with pytest.raises(ValueError, match=re.escape(str(invalid_session_id))): + await proxy_configuration.new_url(invalid_session_id) - session_ids: list[str] = [ - 'a', - 'a_b', - 'a_2', - 'a_1_b', - 'aaa~BBB', - '1', - '0.34252352', - '123456', - 'XXXXXXXXXXxxxxxxxxxxXXXXXXXXXXxxxxxxxxxxXXXXXXXXXX', - ] - for session_id in session_ids: - expected_username = f'groups-{"+".join(groups)},session-{session_id},country-{country_code}' - expected_hostname = 'proxy.apify.com' - expected_port = 8000 - - proxy_url = await proxy_configuration.new_url(session_id) - - assert proxy_url == f'http://{expected_username}:{password}@{expected_hostname}:{expected_port}' - - for invalid_session_id in ['a-b', 'a$b', 'XXXXXXXXXXxxxxxxxxxxXXXXXXXXXXxxxxxxxxxxXXXXXXXXXXTooLong']: - with pytest.raises(ValueError, match=re.escape(str(invalid_session_id))): - await proxy_configuration.new_url(invalid_session_id) - - async def test_rotating_custom_urls(self: TestProxyConfigurationNewUrl) -> None: - proxy_urls = ['http://proxy.com:1111', 'http://proxy.com:2222', 'http://proxy.com:3333'] - proxy_configuration = ProxyConfiguration(proxy_urls=proxy_urls) - - assert await proxy_configuration.new_url() == proxy_urls[0] - assert await proxy_configuration.new_url() == proxy_urls[1] - assert await proxy_configuration.new_url() == proxy_urls[2] - assert await proxy_configuration.new_url() == proxy_urls[0] - assert await proxy_configuration.new_url() == proxy_urls[1] - assert await proxy_configuration.new_url() == proxy_urls[2] - - async def test_rotating_custom_urls_with_sessions(self: TestProxyConfigurationNewUrl) -> None: - sessions = ['sesssion_01', 'sesssion_02', 'sesssion_03', 'sesssion_04', 'sesssion_05', 'sesssion_06'] - proxy_urls = ['http://proxy.com:1111', 'http://proxy.com:2222', 'http://proxy.com:3333'] - - proxy_configuration = ProxyConfiguration(proxy_urls=proxy_urls) - - # same session should use same proxy URL - assert await proxy_configuration.new_url(sessions[0]) == proxy_urls[0] - assert await proxy_configuration.new_url(sessions[0]) == proxy_urls[0] - assert await proxy_configuration.new_url(sessions[0]) == proxy_urls[0] - - # different sessions should rotate different proxies - assert await proxy_configuration.new_url(sessions[1]) == proxy_urls[1] - assert await proxy_configuration.new_url(sessions[2]) == proxy_urls[2] - assert await proxy_configuration.new_url(sessions[3]) == proxy_urls[0] - assert await proxy_configuration.new_url(sessions[4]) == proxy_urls[1] - assert await proxy_configuration.new_url(sessions[5]) == proxy_urls[2] - - # already used sessions should be remembered - assert await proxy_configuration.new_url(sessions[1]) == proxy_urls[1] - assert await proxy_configuration.new_url(sessions[3]) == proxy_urls[0] - - async def test_custom_new_url_function(self: TestProxyConfigurationNewUrl) -> None: - custom_urls = [ - 'http://proxy.com:1111', - 'http://proxy.com:2222', - 'http://proxy.com:3333', - 'http://proxy.com:4444', - 'http://proxy.com:5555', - 'http://proxy.com:6666', - ] - - def custom_new_url_function(session_id: str | None = None, request: Any = None) -> str: - nonlocal custom_urls - return custom_urls.pop() - - proxy_configuration = ProxyConfiguration(new_url_function=custom_new_url_function) - - for custom_url in reversed(custom_urls): - assert await proxy_configuration.new_url() == custom_url - - async def test_custom_new_url_function_async(self: TestProxyConfigurationNewUrl) -> None: - custom_urls = [ - 'http://proxy.com:1111', - 'http://proxy.com:2222', - 'http://proxy.com:3333', - 'http://proxy.com:4444', - 'http://proxy.com:5555', - 'http://proxy.com:6666', - ] - - async def custom_new_url_function(session_id: str | None = None, request: Any = None) -> str: - nonlocal custom_urls - await asyncio.sleep(0.1) - return custom_urls.pop() - - proxy_configuration = ProxyConfiguration(new_url_function=custom_new_url_function) - - for custom_url in reversed(custom_urls): - assert await proxy_configuration.new_url() == custom_url - - async def test_invalid_custom_new_url_function(self: TestProxyConfigurationNewUrl) -> None: - def custom_new_url_function(session_id: str | None = None, request: Any = None) -> str: - raise ValueError - - proxy_configuration = ProxyConfiguration(new_url_function=custom_new_url_function) - - with pytest.raises(ValueError, match='The provided "new_url_function" did not return a valid URL'): - await proxy_configuration.new_url() - - async def test_proxy_configuration_not_sharing_references(self: TestProxyConfigurationNewUrl) -> None: - urls = [ - 'http://proxy-example-1.com:8000', - 'http://proxy-example-2.com:8000', - ] - proxy_configuration_1 = ProxyConfiguration( - proxy_urls=urls, - ) - urls.append('http://proxy-example-3.com:8000') - proxy_configuration_2 = ProxyConfiguration( - proxy_urls=urls, - ) +async def test_rotating_custom_urls() -> None: + proxy_urls = ['http://proxy.com:1111', 'http://proxy.com:2222', 'http://proxy.com:3333'] + proxy_configuration = ProxyConfiguration(proxy_urls=proxy_urls) - assert proxy_configuration_1 is not None - assert proxy_configuration_2 is not None + assert await proxy_configuration.new_url() == proxy_urls[0] + assert await proxy_configuration.new_url() == proxy_urls[1] + assert await proxy_configuration.new_url() == proxy_urls[2] + assert await proxy_configuration.new_url() == proxy_urls[0] + assert await proxy_configuration.new_url() == proxy_urls[1] + assert await proxy_configuration.new_url() == proxy_urls[2] - assert proxy_configuration_1._proxy_urls is not proxy_configuration_2._proxy_urls - session_id = 'ABCD' - await proxy_configuration_1.new_url(session_id=session_id) - await proxy_configuration_2.new_url(session_id=session_id) +async def test_rotating_custom_urls_with_sessions() -> None: + sessions = ['sesssion_01', 'sesssion_02', 'sesssion_03', 'sesssion_04', 'sesssion_05', 'sesssion_06'] + proxy_urls = ['http://proxy.com:1111', 'http://proxy.com:2222', 'http://proxy.com:3333'] - assert proxy_configuration_1._used_proxy_urls is not proxy_configuration_2._used_proxy_urls + proxy_configuration = ProxyConfiguration(proxy_urls=proxy_urls) + # same session should use same proxy URL + assert await proxy_configuration.new_url(sessions[0]) == proxy_urls[0] + assert await proxy_configuration.new_url(sessions[0]) == proxy_urls[0] + assert await proxy_configuration.new_url(sessions[0]) == proxy_urls[0] -class TestProxyConfigurationNewProxyInfo: - async def test_new_proxy_info_basic(self: TestProxyConfigurationNewProxyInfo) -> None: - groups = ['GROUP1', 'GROUP2'] - password = 'abcd1234' - country_code = 'US' - proxy_configuration = ProxyConfiguration( - groups=groups, - password=password, - country_code=country_code, - ) + # different sessions should rotate different proxies + assert await proxy_configuration.new_url(sessions[1]) == proxy_urls[1] + assert await proxy_configuration.new_url(sessions[2]) == proxy_urls[2] + assert await proxy_configuration.new_url(sessions[3]) == proxy_urls[0] + assert await proxy_configuration.new_url(sessions[4]) == proxy_urls[1] + assert await proxy_configuration.new_url(sessions[5]) == proxy_urls[2] - proxy_info = await proxy_configuration.new_proxy_info() - assert proxy_info is not None + # already used sessions should be remembered + assert await proxy_configuration.new_url(sessions[1]) == proxy_urls[1] + assert await proxy_configuration.new_url(sessions[3]) == proxy_urls[0] - expected_hostname = 'proxy.apify.com' - expected_port = 8000 - expected_username = f'groups-{"+".join(groups)},country-{country_code}' - - assert asdict(proxy_info) == { - 'url': f'http://{expected_username}:{password}@{expected_hostname}:{expected_port}', - 'hostname': expected_hostname, - 'port': expected_port, - 'groups': groups, - 'country_code': country_code, - 'username': expected_username, - 'password': password, - 'proxy_tier': None, - 'session_id': None, - 'scheme': 'http', - } - - async def test_new_proxy_info_rotates_urls(self: TestProxyConfigurationNewProxyInfo) -> None: - proxy_urls = ['http://proxy.com:1111', 'http://proxy.com:2222', 'http://proxy.com:3333'] - proxy_configuration = ProxyConfiguration(proxy_urls=proxy_urls) - - proxy_info = await proxy_configuration.new_proxy_info() - assert proxy_info is not None - assert proxy_info.url == proxy_urls[0] - - proxy_info = await proxy_configuration.new_proxy_info() - assert proxy_info is not None - assert proxy_info.url == proxy_urls[1] - - proxy_info = await proxy_configuration.new_proxy_info() - assert proxy_info is not None - assert proxy_info.url == proxy_urls[2] - - proxy_info = await proxy_configuration.new_proxy_info() - assert proxy_info is not None - assert proxy_info.url == proxy_urls[0] - - proxy_info = await proxy_configuration.new_proxy_info() - assert proxy_info is not None - assert proxy_info.url == proxy_urls[1] - - proxy_info = await proxy_configuration.new_proxy_info() - assert proxy_info is not None - assert proxy_info.url == proxy_urls[2] - - async def test_new_proxy_info_rotates_urls_with_sessions(self: TestProxyConfigurationNewProxyInfo) -> None: - sessions = ['sesssion_01', 'sesssion_02', 'sesssion_03', 'sesssion_04', 'sesssion_05', 'sesssion_06'] - proxy_urls = ['http://proxy.com:1111', 'http://proxy.com:2222', 'http://proxy.com:3333'] - - proxy_configuration = ProxyConfiguration(proxy_urls=proxy_urls) - - # same session should use same proxy URL - proxy_info = await proxy_configuration.new_proxy_info(sessions[0]) - assert proxy_info is not None - assert proxy_info.url == proxy_urls[0] - - proxy_info = await proxy_configuration.new_proxy_info(sessions[0]) - assert proxy_info is not None - assert proxy_info.url == proxy_urls[0] - - proxy_info = await proxy_configuration.new_proxy_info(sessions[0]) - assert proxy_info is not None - assert proxy_info.url == proxy_urls[0] - - # different sessions should rotate different proxies - proxy_info = await proxy_configuration.new_proxy_info(sessions[1]) - assert proxy_info is not None - assert proxy_info.url == proxy_urls[1] - - proxy_info = await proxy_configuration.new_proxy_info(sessions[2]) - assert proxy_info is not None - assert proxy_info.url == proxy_urls[2] - - proxy_info = await proxy_configuration.new_proxy_info(sessions[3]) - assert proxy_info is not None - assert proxy_info.url == proxy_urls[0] - - proxy_info = await proxy_configuration.new_proxy_info(sessions[4]) - assert proxy_info is not None - assert proxy_info.url == proxy_urls[1] - - proxy_info = await proxy_configuration.new_proxy_info(sessions[5]) - assert proxy_info is not None - assert proxy_info.url == proxy_urls[2] - - # already used sessions should be remembered - proxy_info = await proxy_configuration.new_proxy_info(sessions[1]) - assert proxy_info is not None - assert proxy_info.url == proxy_urls[1] - - proxy_info = await proxy_configuration.new_proxy_info(sessions[3]) - assert proxy_info is not None - assert proxy_info.url == proxy_urls[0] +async def test_with_custom_new_url_function() -> None: + custom_urls = [ + 'http://proxy.com:1111', + 'http://proxy.com:2222', + 'http://proxy.com:3333', + 'http://proxy.com:4444', + 'http://proxy.com:5555', + 'http://proxy.com:6666', + ] -@pytest.fixture -def patched_apify_client(apify_client_async_patcher: ApifyClientAsyncPatcher) -> ApifyClientAsync: - apify_client_async_patcher.patch( - 'user', - 'get', - return_value={ - 'proxy': { - 'password': DUMMY_PASSWORD, - }, - }, + def custom_new_url_function(session_id: str | None = None, request: Any = None) -> str: + nonlocal custom_urls + return custom_urls.pop() + + proxy_configuration = ProxyConfiguration(new_url_function=custom_new_url_function) + + for custom_url in reversed(custom_urls): + assert await proxy_configuration.new_url() == custom_url + + +async def test_with_async_custom_new_url_function() -> None: + custom_urls = [ + 'http://proxy.com:1111', + 'http://proxy.com:2222', + 'http://proxy.com:3333', + 'http://proxy.com:4444', + 'http://proxy.com:5555', + 'http://proxy.com:6666', + ] + + async def custom_new_url_function(session_id: str | None = None, request: Any = None) -> str: + nonlocal custom_urls + await asyncio.sleep(0.1) + return custom_urls.pop() + + proxy_configuration = ProxyConfiguration(new_url_function=custom_new_url_function) + + for custom_url in reversed(custom_urls): + assert await proxy_configuration.new_url() == custom_url + + +async def test_invalid_custom_new_url_function() -> None: + def custom_new_url_function(session_id: str | None = None, request: Any = None) -> str: + raise ValueError + + proxy_configuration = ProxyConfiguration(new_url_function=custom_new_url_function) + + with pytest.raises(ValueError, match='The provided "new_url_function" did not return a valid URL'): + await proxy_configuration.new_url() + + +async def test_url_reference_not_shared_between_instances() -> None: + urls = [ + 'http://proxy-example-1.com:8000', + 'http://proxy-example-2.com:8000', + ] + proxy_configuration_1 = ProxyConfiguration( + proxy_urls=urls, ) - return ApifyClientAsync() + urls.append('http://proxy-example-3.com:8000') + proxy_configuration_2 = ProxyConfiguration( + proxy_urls=urls, + ) + + assert proxy_configuration_1 is not None + assert proxy_configuration_2 is not None + + assert proxy_configuration_1._proxy_urls is not proxy_configuration_2._proxy_urls + + session_id = 'ABCD' + await proxy_configuration_1.new_url(session_id=session_id) + await proxy_configuration_2.new_url(session_id=session_id) + assert proxy_configuration_1._used_proxy_urls is not proxy_configuration_2._used_proxy_urls -class TestProxyConfigurationInitialize: - async def test_initialize_basic( - self: TestProxyConfigurationInitialize, - monkeypatch: pytest.MonkeyPatch, - respx_mock: MockRouter, - patched_apify_client: ApifyClientAsync, - ) -> None: - dummy_proxy_status_url = 'http://dummy-proxy-status-url.com' - monkeypatch.setenv(ApifyEnvVars.TOKEN.value, 'DUMMY_TOKEN') - monkeypatch.setenv(ApifyEnvVars.PROXY_STATUS_URL.value, dummy_proxy_status_url) - - route = respx_mock.get(dummy_proxy_status_url) - route.mock( - httpx.Response( - 200, - json={ - 'connected': True, - 'connectionError': None, - 'isManInTheMiddle': True, - }, - ) + +async def test_new_proxy_info_basic_construction() -> None: + groups = ['GROUP1', 'GROUP2'] + password = 'abcd1234' + country_code = 'US' + proxy_configuration = ProxyConfiguration( + groups=groups, + password=password, + country_code=country_code, + ) + + proxy_info = await proxy_configuration.new_proxy_info() + assert proxy_info is not None + + expected_hostname = 'proxy.apify.com' + expected_port = 8000 + expected_username = f'groups-{"+".join(groups)},country-{country_code}' + + assert asdict(proxy_info) == { + 'url': f'http://{expected_username}:{password}@{expected_hostname}:{expected_port}', + 'hostname': expected_hostname, + 'port': expected_port, + 'groups': groups, + 'country_code': country_code, + 'username': expected_username, + 'password': password, + 'proxy_tier': None, + 'session_id': None, + 'scheme': 'http', + } + + +async def test_new_proxy_info_rotating_urls() -> None: + proxy_urls = ['http://proxy.com:1111', 'http://proxy.com:2222', 'http://proxy.com:3333'] + proxy_configuration = ProxyConfiguration(proxy_urls=proxy_urls) + + proxy_info = await proxy_configuration.new_proxy_info() + assert proxy_info is not None + assert proxy_info.url == proxy_urls[0] + + proxy_info = await proxy_configuration.new_proxy_info() + assert proxy_info is not None + assert proxy_info.url == proxy_urls[1] + + proxy_info = await proxy_configuration.new_proxy_info() + assert proxy_info is not None + assert proxy_info.url == proxy_urls[2] + + proxy_info = await proxy_configuration.new_proxy_info() + assert proxy_info is not None + assert proxy_info.url == proxy_urls[0] + + proxy_info = await proxy_configuration.new_proxy_info() + assert proxy_info is not None + assert proxy_info.url == proxy_urls[1] + + proxy_info = await proxy_configuration.new_proxy_info() + assert proxy_info is not None + assert proxy_info.url == proxy_urls[2] + + +async def test_new_proxy_info_rotating_urls_with_sessions() -> None: + sessions = ['sesssion_01', 'sesssion_02', 'sesssion_03', 'sesssion_04', 'sesssion_05', 'sesssion_06'] + proxy_urls = ['http://proxy.com:1111', 'http://proxy.com:2222', 'http://proxy.com:3333'] + + proxy_configuration = ProxyConfiguration(proxy_urls=proxy_urls) + + # same session should use same proxy URL + proxy_info = await proxy_configuration.new_proxy_info(sessions[0]) + assert proxy_info is not None + assert proxy_info.url == proxy_urls[0] + + proxy_info = await proxy_configuration.new_proxy_info(sessions[0]) + assert proxy_info is not None + assert proxy_info.url == proxy_urls[0] + + proxy_info = await proxy_configuration.new_proxy_info(sessions[0]) + assert proxy_info is not None + assert proxy_info.url == proxy_urls[0] + + # different sessions should rotate different proxies + proxy_info = await proxy_configuration.new_proxy_info(sessions[1]) + assert proxy_info is not None + assert proxy_info.url == proxy_urls[1] + + proxy_info = await proxy_configuration.new_proxy_info(sessions[2]) + assert proxy_info is not None + assert proxy_info.url == proxy_urls[2] + + proxy_info = await proxy_configuration.new_proxy_info(sessions[3]) + assert proxy_info is not None + assert proxy_info.url == proxy_urls[0] + + proxy_info = await proxy_configuration.new_proxy_info(sessions[4]) + assert proxy_info is not None + assert proxy_info.url == proxy_urls[1] + + proxy_info = await proxy_configuration.new_proxy_info(sessions[5]) + assert proxy_info is not None + assert proxy_info.url == proxy_urls[2] + + # already used sessions should be remembered + proxy_info = await proxy_configuration.new_proxy_info(sessions[1]) + assert proxy_info is not None + assert proxy_info.url == proxy_urls[1] + + proxy_info = await proxy_configuration.new_proxy_info(sessions[3]) + assert proxy_info is not None + assert proxy_info.url == proxy_urls[0] + + +async def test_initialize_with_valid_configuration( + monkeypatch: pytest.MonkeyPatch, + respx_mock: MockRouter, + patched_apify_client: ApifyClientAsync, +) -> None: + dummy_proxy_status_url = 'http://dummy-proxy-status-url.com' + monkeypatch.setenv(ApifyEnvVars.TOKEN.value, 'DUMMY_TOKEN') + monkeypatch.setenv(ApifyEnvVars.PROXY_STATUS_URL.value, dummy_proxy_status_url) + + route = respx_mock.get(dummy_proxy_status_url) + route.mock( + httpx.Response( + 200, + json={ + 'connected': True, + 'connectionError': None, + 'isManInTheMiddle': True, + }, ) + ) - proxy_configuration = ProxyConfiguration(_apify_client=patched_apify_client) + proxy_configuration = ProxyConfiguration(_apify_client=patched_apify_client) + await proxy_configuration.initialize() + + assert proxy_configuration._password == DUMMY_PASSWORD + assert proxy_configuration.is_man_in_the_middle is True + + assert len(patched_apify_client.calls['user']['get']) == 1 # type: ignore + assert len(route.calls) == 1 + + +async def test_initialize_without_password_or_token() -> None: + proxy_configuration = ProxyConfiguration() + + with pytest.raises(ValueError, match='Apify Proxy password must be provided'): await proxy_configuration.initialize() - assert proxy_configuration._password == DUMMY_PASSWORD - assert proxy_configuration.is_man_in_the_middle is True - - assert len(patched_apify_client.calls['user']['get']) == 1 # type: ignore - assert len(route.calls) == 1 - - async def test_initialize_no_password_no_token(self: TestProxyConfigurationInitialize) -> None: - proxy_configuration = ProxyConfiguration() - - with pytest.raises(ValueError, match='Apify Proxy password must be provided'): - await proxy_configuration.initialize() - - async def test_initialize_manual_password( - self: TestProxyConfigurationInitialize, - monkeypatch: pytest.MonkeyPatch, - respx_mock: MockRouter, - ) -> None: - dummy_proxy_status_url = 'http://dummy-proxy-status-url.com' - monkeypatch.setenv(ApifyEnvVars.PROXY_STATUS_URL.value, dummy_proxy_status_url) - - respx_mock.get(dummy_proxy_status_url).mock( - httpx.Response( - 200, - json={ - 'connected': True, - 'connectionError': None, - 'isManInTheMiddle': False, - }, - ) + +async def test_initialize_with_manual_password(monkeypatch: pytest.MonkeyPatch, respx_mock: MockRouter) -> None: + dummy_proxy_status_url = 'http://dummy-proxy-status-url.com' + monkeypatch.setenv(ApifyEnvVars.PROXY_STATUS_URL.value, dummy_proxy_status_url) + + respx_mock.get(dummy_proxy_status_url).mock( + httpx.Response( + 200, + json={ + 'connected': True, + 'connectionError': None, + 'isManInTheMiddle': False, + }, ) + ) - proxy_configuration = ProxyConfiguration(password=DUMMY_PASSWORD) + proxy_configuration = ProxyConfiguration(password=DUMMY_PASSWORD) + + await proxy_configuration.initialize() + + assert proxy_configuration._password == DUMMY_PASSWORD + assert proxy_configuration.is_man_in_the_middle is False + + +async def test_initialize_with_manual_password_different_than_user_one( + monkeypatch: pytest.MonkeyPatch, + caplog: pytest.LogCaptureFixture, + respx_mock: MockRouter, + patched_apify_client: ApifyClientAsync, +) -> None: + dummy_proxy_status_url = 'http://dummy-proxy-status-url.com' + different_dummy_password = 'DIFFERENT_DUMMY_PASSWORD' + monkeypatch.setenv(ApifyEnvVars.TOKEN.value, 'DUMMY_TOKEN') + monkeypatch.setenv(ApifyEnvVars.PROXY_STATUS_URL.value, dummy_proxy_status_url) + monkeypatch.setenv(ApifyEnvVars.PROXY_PASSWORD.value, different_dummy_password) + + respx_mock.get(dummy_proxy_status_url).mock( + httpx.Response( + 200, + json={ + 'connected': True, + 'connectionError': None, + 'isManInTheMiddle': True, + }, + ) + ) - await proxy_configuration.initialize() + proxy_configuration = ProxyConfiguration(_apify_client=patched_apify_client) - assert proxy_configuration._password == DUMMY_PASSWORD - assert proxy_configuration.is_man_in_the_middle is False - - async def test_initialize_manual_password_different_than_user_one( - self: TestProxyConfigurationInitialize, - monkeypatch: pytest.MonkeyPatch, - caplog: pytest.LogCaptureFixture, - respx_mock: MockRouter, - patched_apify_client: ApifyClientAsync, - ) -> None: - dummy_proxy_status_url = 'http://dummy-proxy-status-url.com' - different_dummy_password = 'DIFFERENT_DUMMY_PASSWORD' - monkeypatch.setenv(ApifyEnvVars.TOKEN.value, 'DUMMY_TOKEN') - monkeypatch.setenv(ApifyEnvVars.PROXY_STATUS_URL.value, dummy_proxy_status_url) - monkeypatch.setenv(ApifyEnvVars.PROXY_PASSWORD.value, different_dummy_password) - - respx_mock.get(dummy_proxy_status_url).mock( - httpx.Response( - 200, - json={ - 'connected': True, - 'connectionError': None, - 'isManInTheMiddle': True, - }, - ) + await proxy_configuration.initialize() + + assert proxy_configuration._password == different_dummy_password + assert proxy_configuration.is_man_in_the_middle is True + + assert len(caplog.records) == 1 + assert caplog.records[0].levelname == 'WARNING' + assert 'The Apify Proxy password you provided belongs to a different user' in caplog.records[0].message + + +async def test_initialize_when_not_connected(monkeypatch: pytest.MonkeyPatch, respx_mock: MockRouter) -> None: + dummy_connection_error = 'DUMMY_CONNECTION_ERROR' + dummy_proxy_status_url = 'http://dummy-proxy-status-url.com' + monkeypatch.setenv(ApifyEnvVars.PROXY_STATUS_URL.value, dummy_proxy_status_url) + + respx_mock.get(dummy_proxy_status_url).mock( + httpx.Response( + 200, + json={ + 'connected': False, + 'connectionError': dummy_connection_error, + }, ) + ) - proxy_configuration = ProxyConfiguration(_apify_client=patched_apify_client) + proxy_configuration = ProxyConfiguration(password=DUMMY_PASSWORD) + with pytest.raises(ConnectionError, match=dummy_connection_error): await proxy_configuration.initialize() - assert proxy_configuration._password == different_dummy_password - assert proxy_configuration.is_man_in_the_middle is True - - assert len(caplog.records) == 1 - assert caplog.records[0].levelname == 'WARNING' - assert 'The Apify Proxy password you provided belongs to a different user' in caplog.records[0].message - - async def test_initialize_not_connected( - self: TestProxyConfigurationInitialize, - monkeypatch: pytest.MonkeyPatch, - respx_mock: MockRouter, - ) -> None: - dummy_connection_error = 'DUMMY_CONNECTION_ERROR' - dummy_proxy_status_url = 'http://dummy-proxy-status-url.com' - monkeypatch.setenv(ApifyEnvVars.PROXY_STATUS_URL.value, dummy_proxy_status_url) - - respx_mock.get(dummy_proxy_status_url).mock( - httpx.Response( - 200, - json={ - 'connected': False, - 'connectionError': dummy_connection_error, - }, - ) - ) - proxy_configuration = ProxyConfiguration(password=DUMMY_PASSWORD) +async def test_initialize_when_status_page_unavailable( + monkeypatch: pytest.MonkeyPatch, + caplog: pytest.LogCaptureFixture, + respx_mock: MockRouter, +) -> None: + dummy_proxy_status_url = 'http://dummy-proxy-status-url.com' + monkeypatch.setenv(ApifyEnvVars.PROXY_STATUS_URL.value, dummy_proxy_status_url) - with pytest.raises(ConnectionError, match=dummy_connection_error): - await proxy_configuration.initialize() + respx_mock.get(dummy_proxy_status_url).mock(httpx.Response(500)) - async def test_initialize_status_page_unavailable( - self: TestProxyConfigurationInitialize, - monkeypatch: pytest.MonkeyPatch, - caplog: pytest.LogCaptureFixture, - respx_mock: MockRouter, - ) -> None: - dummy_proxy_status_url = 'http://dummy-proxy-status-url.com' - monkeypatch.setenv(ApifyEnvVars.PROXY_STATUS_URL.value, dummy_proxy_status_url) + proxy_configuration = ProxyConfiguration(password=DUMMY_PASSWORD) - respx_mock.get(dummy_proxy_status_url).mock(httpx.Response(500)) + await proxy_configuration.initialize() - proxy_configuration = ProxyConfiguration(password=DUMMY_PASSWORD) + assert len(caplog.records) == 1 + assert caplog.records[0].levelname == 'WARNING' + assert 'Apify Proxy access check timed out' in caplog.records[0].message - await proxy_configuration.initialize() - assert len(caplog.records) == 1 - assert caplog.records[0].levelname == 'WARNING' - assert 'Apify Proxy access check timed out' in caplog.records[0].message +async def test_initialize_with_non_apify_proxy( + monkeypatch: pytest.MonkeyPatch, + respx_mock: MockRouter, + patched_apify_client: ApifyClientAsync, +) -> None: + dummy_proxy_status_url = 'http://dummy-proxy-status-url.com' + monkeypatch.setenv(ApifyEnvVars.PROXY_STATUS_URL.value, dummy_proxy_status_url) - async def test_initialize_not_called_non_apify_proxy( - self: TestProxyConfigurationInitialize, - monkeypatch: pytest.MonkeyPatch, - respx_mock: MockRouter, - patched_apify_client: ApifyClientAsync, - ) -> None: - dummy_proxy_status_url = 'http://dummy-proxy-status-url.com' - monkeypatch.setenv(ApifyEnvVars.PROXY_STATUS_URL.value, dummy_proxy_status_url) + route = respx_mock.get(dummy_proxy_status_url) + route.mock(httpx.Response(200)) - route = respx_mock.get(dummy_proxy_status_url) - route.mock(httpx.Response(200)) + proxy_configuration = ProxyConfiguration(proxy_urls=['http://dummy-proxy.com:8000']) - proxy_configuration = ProxyConfiguration(proxy_urls=['http://dummy-proxy.com:8000']) + await proxy_configuration.initialize() - await proxy_configuration.initialize() + assert len(patched_apify_client.calls['user']['get']) == 0 # type: ignore + assert len(route.calls) == 0 + + +def test_is_url_validation() -> None: + assert is_url('http://dummy-proxy.com:8000') is True + assert is_url('https://example.com') is True + assert is_url('http://localhost') is True + assert is_url('https://12.34.56.78') is True + assert is_url('http://12.34.56.78:9012') is True + assert is_url('http://::1') is True + assert is_url('https://2f45:4da6:8f56:af8c:5dce:c1de:14d2:8661') is True - assert len(patched_apify_client.calls['user']['get']) == 0 # type: ignore - assert len(route.calls) == 0 - - -class TestIsUrl: - def test__is_url(self: TestIsUrl) -> None: - assert is_url('http://dummy-proxy.com:8000') is True - assert is_url('https://example.com') is True - assert is_url('http://localhost') is True - assert is_url('https://12.34.56.78') is True - assert is_url('http://12.34.56.78:9012') is True - assert is_url('http://::1') is True - assert is_url('https://2f45:4da6:8f56:af8c:5dce:c1de:14d2:8661') is True - - assert is_url('dummy-proxy.com:8000') is False - assert is_url('gyfwgfhkjhljkfhdsf') is False - assert is_url('http://') is False - assert is_url('http://example') is False - assert is_url('http:/example.com') is False - assert is_url('12.34.56.78') is False - assert is_url('::1') is False - assert is_url('https://4da6:8f56:af8c:5dce:c1de:14d2:8661') is False + assert is_url('dummy-proxy.com:8000') is False + assert is_url('gyfwgfhkjhljkfhdsf') is False + assert is_url('http://') is False + assert is_url('http://example') is False + assert is_url('http:/example.com') is False + assert is_url('12.34.56.78') is False + assert is_url('::1') is False + assert is_url('https://4da6:8f56:af8c:5dce:c1de:14d2:8661') is False