Skip to content

Commit fd0566e

Browse files
authored
feat: Add integration tests for storages, proxy configuration (#49)
1 parent a2e4ff6 commit fd0566e

11 files changed

+349
-24
lines changed

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@
5050
package_data={'apify': ['py.typed']},
5151
python_requires='>=3.8',
5252
install_requires=[
53-
'apify-client ~= 0.7.0b39',
53+
'apify-client ~= 0.7.0b46',
5454
'httpx ~= 0.23.0',
5555
'psutil ~= 5.9.4',
5656
'pydantic ~= 1.10.2',

src/apify/_crypto.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@ def public_encrypt(value: str, *, public_key: rsa.RSAPublicKey) -> dict:
2121
It returns the encrypted password and encrypted value in BASE64 format.
2222
2323
Args:
24-
value (str): Password used to encrypt the private key encoded as base64 string.
25-
public_key (RSAPublicKey): Private key to use for decryption.
24+
value (str): The value which should be encrypted.
25+
public_key (RSAPublicKey): Public key to use for encryption.
2626
2727
Returns:
2828
disc: Encrypted password and value.

src/apify/storages/request_queue.py

Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import json
33
import logging
44
from collections import OrderedDict
5-
from datetime import datetime
5+
from datetime import datetime, timezone
66
from typing import Coroutine, Dict, Optional
77
from typing import OrderedDict as OrderedDictType
88
from typing import Set, Union
@@ -110,7 +110,7 @@ def __init__(self, id: str, name: Optional[str], client: Union[ApifyClientAsync,
110110
self._queue_head_dict = OrderedDict()
111111
self._query_queue_head_promise = None
112112
self._in_progress = set()
113-
self._last_activity = datetime.utcnow()
113+
self._last_activity = datetime.now(timezone.utc)
114114
self._recently_handled = LRUCache[bool](max_length=RECENTLY_HANDLED_CACHE_SIZE)
115115
self._requests_cache = LRUCache(max_length=MAX_CACHED_REQUESTS)
116116

@@ -140,7 +140,7 @@ async def add_request(self, request: Dict, *, forefront: bool = False) -> Dict:
140140
_budget_ow(request, {
141141
'url': (str, True),
142142
})
143-
self._last_activity = datetime.utcnow()
143+
self._last_activity = datetime.now(timezone.utc)
144144

145145
if request.get('uniqueKey') is None:
146146
request['uniqueKey'] = request['url'] # TODO: Check Request class in crawlee and replicate uniqueKey generation logic...
@@ -215,7 +215,7 @@ async def fetch_next_request(self) -> Optional[Dict]:
215215
})}""")
216216
return None
217217
self._in_progress.add(next_request_id)
218-
self._last_activity = datetime.utcnow()
218+
self._last_activity = datetime.now(timezone.utc)
219219

220220
try:
221221
request = await self.get_request(next_request_id)
@@ -266,12 +266,12 @@ async def mark_request_as_handled(self, request: Dict) -> Optional[Dict]:
266266
'uniqueKey': (str, True),
267267
'handledAt': (datetime, False),
268268
})
269-
self._last_activity = datetime.utcnow()
269+
self._last_activity = datetime.now(timezone.utc)
270270
if request['id'] not in self._in_progress:
271271
logging.debug(f'Cannot mark request {request["id"]} as handled, because it is not in progress!')
272272
return None
273273

274-
request['handledAt'] = request.get('handledAt', datetime.utcnow())
274+
request['handledAt'] = request.get('handledAt', datetime.now(timezone.utc))
275275
queue_operation_info = await self._client.update_request({**request})
276276
queue_operation_info['uniqueKey'] = request['uniqueKey']
277277

@@ -302,7 +302,7 @@ async def reclaim_request(self, request: Dict, forefront: bool = False) -> Optio
302302
'id': (str, True),
303303
'uniqueKey': (str, True),
304304
})
305-
self._last_activity = datetime.utcnow()
305+
self._last_activity = datetime.now(timezone.utc)
306306

307307
if request['id'] not in self._in_progress:
308308
logging.debug(f'Cannot reclaim request {request["id"]}, because it is not in progress!')
@@ -352,7 +352,8 @@ async def is_finished(self) -> bool:
352352
Returns:
353353
bool: `True` if all requests were already handled and there are no more left. `False` otherwise.
354354
"""
355-
if self._in_progress_count() > 0 and (datetime.utcnow() - self._last_activity).seconds > self._internal_timeout_seconds:
355+
seconds_since_last_activity = (datetime.now(timezone.utc) - self._last_activity).seconds
356+
if self._in_progress_count() > 0 and seconds_since_last_activity > self._internal_timeout_seconds:
356357
message = f'The request queue seems to be stuck for {self._internal_timeout_seconds}s, resetting internal state.'
357358
logging.warning(message)
358359
self._reset()
@@ -371,7 +372,7 @@ def _reset(self) -> None:
371372
self._assumed_total_count = 0
372373
self._assumed_handled_count = 0
373374
self._requests_cache.clear()
374-
self._last_activity = datetime.utcnow()
375+
self._last_activity = datetime.now(timezone.utc)
375376

376377
def _cache_request(self, cache_key: str, queue_operation_info: Dict) -> None:
377378
self._requests_cache[cache_key] = {
@@ -382,7 +383,7 @@ def _cache_request(self, cache_key: str, queue_operation_info: Dict) -> None:
382383
}
383384

384385
async def _queue_query_head(self, limit: int) -> Dict:
385-
query_started_at = datetime.utcnow()
386+
query_started_at = datetime.now(timezone.utc)
386387

387388
list_head = await self._client.list_head(limit=limit)
388389
for request in list_head['items']:
@@ -391,10 +392,10 @@ async def _queue_query_head(self, limit: int) -> Dict:
391392
continue
392393
self._queue_head_dict[request['id']] = request['id']
393394
self._cache_request(_unique_key_to_request_id(request['uniqueKey']), {
394-
'request_id': request['id'],
395-
'was_already_handled': False,
396-
'was_already_present': True,
397-
'unique_key': request['uniqueKey'],
395+
'requestId': request['id'],
396+
'wasAlreadyHandled': False,
397+
'wasAlreadyPresent': True,
398+
'uniqueKey': request['uniqueKey'],
398399
})
399400

400401
# This is needed so that the next call to _ensureHeadIsNonEmpty() will fetch the queue head again.
@@ -440,7 +441,7 @@ async def _ensure_head_is_non_empty(self, ensure_consistency: bool = False, limi
440441
# If ensureConsistency=true then we must ensure that either:
441442
# - queueModifiedAt is older than queryStartedAt by at least API_PROCESSED_REQUESTS_DELAY_MILLIS
442443
# - hadMultipleClients=false and this.assumedTotalCount<=this.assumedHandledCount
443-
is_database_consistent = (queue_head['queryStartedAt'] - queue_head['queueModifiedAt']
444+
is_database_consistent = (queue_head['queryStartedAt'] - queue_head['queueModifiedAt'].replace(tzinfo=timezone.utc)
444445
).seconds >= (API_PROCESSED_REQUESTS_DELAY_MILLIS // 1000)
445446
is_locally_consistent = not queue_head['hadMultipleClients'] and self._assumed_total_count <= self._assumed_handled_count
446447
# Consistent information from one source is enough to consider request queue finished.
@@ -459,7 +460,8 @@ async def _ensure_head_is_non_empty(self, ensure_consistency: bool = False, limi
459460

460461
# If we are repeating for consistency then wait required time.
461462
if should_repeat_for_consistency:
462-
delay_seconds = (API_PROCESSED_REQUESTS_DELAY_MILLIS // 1000) - (datetime.utcnow() - queue_head['queueModifiedAt']).seconds
463+
delay_seconds = (API_PROCESSED_REQUESTS_DELAY_MILLIS // 1000) - \
464+
(datetime.now(timezone.utc) - queue_head['queueModifiedAt']).seconds
463465
logging.info(f'Waiting for {delay_seconds}s before considering the queue as finished to ensure that the data is consistent.')
464466
await asyncio.sleep(delay_seconds)
465467

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
from apify import Actor
2+
3+
from .conftest import ActorFactory
4+
5+
6+
class TestActorCreateProxyConfiguration:
7+
8+
async def test_create_proxy_configuration_basic(self, make_actor: ActorFactory) -> None:
9+
async def main() -> None:
10+
groups = ['SHADER']
11+
country_code = 'US'
12+
13+
async with Actor:
14+
proxy_configuration = await Actor.create_proxy_configuration(
15+
groups=groups,
16+
country_code=country_code,
17+
)
18+
19+
assert proxy_configuration is not None
20+
assert proxy_configuration._groups == groups
21+
assert proxy_configuration._password is not None
22+
assert proxy_configuration._country_code == country_code
23+
24+
actor = await make_actor('proxy-configuration', main_func=main)
25+
26+
run_result = await actor.call()
27+
assert run_result is not None
28+
assert run_result['status'] == 'SUCCEEDED'
29+
30+
async def test_create_proxy_configuration_complex(self, make_actor: ActorFactory) -> None:
31+
async def main() -> None:
32+
await Actor.init()
33+
34+
proxy_url_suffix = f'{Actor.config.proxy_password}@{Actor.config.proxy_hostname}:{Actor.config.proxy_port}'
35+
36+
proxy_configuration = await Actor.create_proxy_configuration(actor_proxy_input={
37+
'useApifyProxy': True,
38+
})
39+
assert proxy_configuration is not None
40+
assert await proxy_configuration.new_url() == f'http://auto:{proxy_url_suffix}'
41+
42+
groups = ['SHADER', 'BUYPROXIES94952']
43+
country_code = 'US'
44+
proxy_configuration = await Actor.create_proxy_configuration(actor_proxy_input={
45+
'useApifyProxy': True,
46+
'apifyProxyGroups': groups,
47+
'apifyProxyCountry': country_code,
48+
})
49+
assert proxy_configuration is not None
50+
assert await proxy_configuration.new_url() == f'http://groups-{"+".join(groups)},country-{country_code}:{proxy_url_suffix}'
51+
52+
await Actor.exit()
53+
54+
actor = await make_actor('proxy-configuration', main_func=main)
55+
56+
run_result = await actor.call()
57+
assert run_result is not None
58+
assert run_result['status'] == 'SUCCEEDED'
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
from apify import Actor
2+
3+
from ._utils import generate_unique_resource_name
4+
from .conftest import ActorFactory
5+
6+
7+
class TestActorPushData:
8+
9+
async def test_push_data(self, make_actor: ActorFactory) -> None:
10+
desired_item_count = 100 # Also change inside main() if you're changing this
11+
12+
async def main() -> None:
13+
desired_item_count = 100
14+
async with Actor:
15+
await Actor.push_data([{'id': i} for i in range(desired_item_count)])
16+
17+
actor = await make_actor('push-data', main_func=main)
18+
19+
run_result = await actor.call()
20+
21+
assert run_result is not None
22+
assert run_result['status'] == 'SUCCEEDED'
23+
list_page = await actor.last_run().dataset().list_items()
24+
assert list_page.items[0]['id'] == 0
25+
assert list_page.items[-1]['id'] == desired_item_count - 1
26+
assert len(list_page.items) == list_page.count == desired_item_count
27+
28+
29+
class TestActorOpenDataset:
30+
async def test_same_references_default(self, make_actor: ActorFactory) -> None:
31+
async def main() -> None:
32+
async with Actor:
33+
dataset1 = await Actor.open_dataset()
34+
dataset2 = await Actor.open_dataset()
35+
assert dataset1 is dataset2
36+
37+
actor = await make_actor('dataset-same-ref-default', main_func=main)
38+
39+
run_result = await actor.call()
40+
assert run_result is not None
41+
assert run_result['status'] == 'SUCCEEDED'
42+
43+
async def test_same_references_named(self, make_actor: ActorFactory) -> None:
44+
dataset_name = generate_unique_resource_name('dataset')
45+
46+
async def main() -> None:
47+
async with Actor:
48+
input_object = await Actor.get_input()
49+
dataset_name = input_object['datasetName']
50+
dataset1 = await Actor.open_dataset(dataset_name)
51+
dataset2 = await Actor.open_dataset(dataset_name)
52+
assert dataset1 is dataset2
53+
await dataset1.drop()
54+
55+
actor = await make_actor('dataset-same-ref-named', main_func=main)
56+
57+
run_result = await actor.call(run_input={'datasetName': dataset_name})
58+
assert run_result is not None
59+
assert run_result['status'] == 'SUCCEEDED'
Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
from apify import Actor
2+
3+
from ._utils import generate_unique_resource_name
4+
from .conftest import ActorFactory
5+
6+
7+
class TestActorOpenKeyValueStore:
8+
async def test_same_references_default(self, make_actor: ActorFactory) -> None:
9+
async def main() -> None:
10+
async with Actor:
11+
kvs1 = await Actor.open_key_value_store()
12+
kvs2 = await Actor.open_key_value_store()
13+
assert kvs1 is kvs2
14+
15+
actor = await make_actor('kvs-same-ref-default', main_func=main)
16+
17+
run_result = await actor.call()
18+
assert run_result is not None
19+
assert run_result['status'] == 'SUCCEEDED'
20+
21+
async def test_same_references_named(self, make_actor: ActorFactory) -> None:
22+
kvs_name = generate_unique_resource_name('key-value-store')
23+
24+
async def main() -> None:
25+
async with Actor:
26+
input_object = await Actor.get_input()
27+
kvs_name = input_object['kvsName']
28+
kvs1 = await Actor.open_key_value_store(kvs_name)
29+
kvs2 = await Actor.open_key_value_store(kvs_name)
30+
assert kvs1 is kvs2
31+
await kvs1.drop()
32+
33+
actor = await make_actor('kvs-same-ref-named', main_func=main)
34+
35+
run_result = await actor.call(run_input={'kvsName': kvs_name})
36+
assert run_result is not None
37+
assert run_result['status'] == 'SUCCEEDED'
38+
39+
40+
class TestActorGetSetValue:
41+
async def test_actor_get_set_value_simple(self, make_actor: ActorFactory) -> None:
42+
async def main() -> None:
43+
async with Actor:
44+
await Actor.set_value('test', {'number': 123, 'string': 'a string', 'nested': {'test': 1}})
45+
value = await Actor.get_value('test')
46+
assert value['number'] == 123
47+
assert value['string'] == 'a string'
48+
assert value['nested']['test'] == 1
49+
50+
actor = await make_actor('actor-get-set-value', main_func=main)
51+
52+
run_result = await actor.call()
53+
assert run_result is not None
54+
assert run_result['status'] == 'SUCCEEDED'
55+
56+
async def test_actor_get_set_value_complex(self, make_actor: ActorFactory) -> None:
57+
async def main_set() -> None:
58+
async with Actor:
59+
await Actor.set_value('test', {'number': 123, 'string': 'a string', 'nested': {'test': 1}})
60+
61+
actor_set = await make_actor('actor-set-value', main_func=main_set)
62+
63+
run_result_set = await actor_set.call()
64+
assert run_result_set is not None
65+
assert run_result_set['status'] == 'SUCCEEDED'
66+
# Externally check if the value is present in key-value store
67+
test_record = await actor_set.last_run().key_value_store().get_record('test')
68+
assert test_record is not None
69+
test_value = test_record['value']
70+
assert test_value['number'] == 123
71+
assert test_value['string'] == 'a string'
72+
assert test_value['nested']['test'] == 1
73+
74+
async def main_get() -> None:
75+
async with Actor:
76+
input_object = await Actor.get_input()
77+
# Access KVS of the previous 'set' run
78+
kvs = await Actor.open_key_value_store(input_object['kvs-id'])
79+
value = await kvs.get_value('test')
80+
assert value['number'] == 123
81+
assert value['string'] == 'a string'
82+
assert value['nested']['test'] == 1
83+
84+
actor_get = await make_actor('actor-get-value', main_func=main_get)
85+
default_kvs_info = await actor_set.last_run().key_value_store().get()
86+
assert default_kvs_info is not None
87+
88+
run_result_get = await actor_get.call(run_input={'kvs-id': default_kvs_info['id']})
89+
assert run_result_get is not None
90+
assert run_result_get['status'] == 'SUCCEEDED'
91+
92+
93+
class TestActorGetInput:
94+
async def test_actor_get_input(self, make_actor: ActorFactory) -> None:
95+
actor_source_files = {
96+
'INPUT_SCHEMA.json': """
97+
{
98+
"title": "Actor get input test",
99+
"type": "object",
100+
"schemaVersion": 1,
101+
"properties": {
102+
"password": {
103+
"title": "Password",
104+
"type": "string",
105+
"description": "A secret, encrypted input field",
106+
"editor": "textfield",
107+
"isSecret": true
108+
}
109+
},
110+
"required": ["password"]
111+
}
112+
""",
113+
'src/main.py': """
114+
import asyncio
115+
from apify import Actor
116+
117+
async def main():
118+
async with Actor:
119+
input_object = await Actor.get_input()
120+
assert input_object is not None
121+
assert input_object['number'] == 123
122+
assert input_object['string'] == 'a string'
123+
assert input_object['nested']['test'] == 1
124+
assert input_object['password'] == 'very secret'
125+
""",
126+
}
127+
actor = await make_actor('actor-get-input', source_files=actor_source_files)
128+
129+
run_result = await actor.call(run_input={
130+
'number': 123,
131+
'string': 'a string',
132+
'nested': {'test': 1},
133+
'password': 'very secret',
134+
})
135+
assert run_result is not None
136+
assert run_result['status'] == 'SUCCEEDED'

0 commit comments

Comments
 (0)