diff --git a/src/apify/_crypto.py b/src/apify/_crypto.py index d3257582..3071efaa 100644 --- a/src/apify/_crypto.py +++ b/src/apify/_crypto.py @@ -1,6 +1,9 @@ from __future__ import annotations import base64 +import hashlib +import hmac +import string from typing import Any from cryptography.exceptions import InvalidTag as InvalidTagException @@ -153,3 +156,38 @@ def decrypt_input_secrets(private_key: rsa.RSAPrivateKey, input_data: Any) -> An ) return input_data + + +CHARSET = string.digits + string.ascii_letters + + +def encode_base62(num: int) -> str: + """Encode the given number to base62.""" + if num == 0: + return CHARSET[0] + + res = '' + while num > 0: + num, remainder = divmod(num, 62) + res = CHARSET[remainder] + res + return res + + +@ignore_docs +def create_hmac_signature(secret_key: str, message: str) -> str: + """Generate an HMAC signature and encodes it using Base62. Base62 encoding reduces the signature length. + + HMAC signature is truncated to 30 characters to make it shorter. + + Args: + secret_key: Secret key used for signing signatures. + message: Message to be signed. + + Returns: + Base62 encoded signature. + """ + signature = hmac.new(secret_key.encode('utf-8'), message.encode('utf-8'), hashlib.sha256).hexdigest()[:30] + + decimal_signature = int(signature, 16) + + return encode_base62(decimal_signature) diff --git a/src/apify/apify_storage_client/_key_value_store_client.py b/src/apify/apify_storage_client/_key_value_store_client.py index aeeb251a..49883b3f 100644 --- a/src/apify/apify_storage_client/_key_value_store_client.py +++ b/src/apify/apify_storage_client/_key_value_store_client.py @@ -4,10 +4,13 @@ from typing import TYPE_CHECKING, Any from typing_extensions import override +from yarl import URL from crawlee.storage_clients._base import KeyValueStoreClient as BaseKeyValueStoreClient from crawlee.storage_clients.models import KeyValueStoreListKeysPage, KeyValueStoreMetadata, KeyValueStoreRecord +from apify._crypto import create_hmac_signature + if TYPE_CHECKING: from collections.abc import AsyncIterator from contextlib import AbstractAsyncContextManager @@ -89,6 +92,18 @@ async def get_public_url(self, key: str) -> str: Args: key: The key for which the URL should be generated. """ - public_api_url = self._api_public_base_url + if self._client.resource_id is None: + raise ValueError('resource_id cannot be None when generating a public URL') + + public_url = ( + URL(self._api_public_base_url) / 'v2' / 'key-value-stores' / self._client.resource_id / 'records' / key + ) + + key_value_store = await self.get() + + if key_value_store is not None and isinstance(key_value_store.model_extra, dict): + url_signing_secret_key = key_value_store.model_extra.get('urlSigningSecretKey') + if url_signing_secret_key: + public_url = public_url.with_query(signature=create_hmac_signature(url_signing_secret_key, key)) - return f'{public_api_url}/v2/key-value-stores/{self._client.resource_id}/records/{key}' + return str(public_url) diff --git a/tests/integration/test_actor_key_value_store.py b/tests/integration/test_actor_key_value_store.py index 6ed64123..6b6dd767 100644 --- a/tests/integration/test_actor_key_value_store.py +++ b/tests/integration/test_actor_key_value_store.py @@ -201,19 +201,28 @@ async def test_generate_public_url_for_kvs_record( run_actor: RunActorFunction, ) -> None: async def main() -> None: - from typing import cast - - from apify.apify_storage_client._key_value_store_client import KeyValueStoreClient + from apify._crypto import create_hmac_signature async with Actor: public_api_url = Actor.config.api_public_base_url default_store_id = Actor.config.default_key_value_store_id + record_key = 'public-record-key' store = await Actor.open_key_value_store() - record_url = await cast(KeyValueStoreClient, store._resource_client).get_public_url('dummy') - print(record_url) - assert record_url == f'{public_api_url}/v2/key-value-stores/{default_store_id}/records/dummy' + assert isinstance(store.storage_object.model_extra, dict) + url_signing_secret_key = store.storage_object.model_extra.get('urlSigningSecretKey') + assert url_signing_secret_key is not None + + await store.set_value(record_key, {'exposedData': 'test'}, 'application/json') + + record_url = await store.get_public_url(record_key) + + signature = create_hmac_signature(url_signing_secret_key, record_key) + assert ( + record_url + == f'{public_api_url}/v2/key-value-stores/{default_store_id}/records/{record_key}?signature={signature}' + ) actor = await make_actor(label='kvs-get-public-url', main_func=main) run_result = await run_actor(actor) diff --git a/tests/unit/test_crypto.py b/tests/unit/test_crypto.py index 24da3b6b..1dead9c0 100644 --- a/tests/unit/test_crypto.py +++ b/tests/unit/test_crypto.py @@ -4,7 +4,15 @@ import pytest -from apify._crypto import _load_public_key, crypto_random_object_id, load_private_key, private_decrypt, public_encrypt +from apify._crypto import ( + _load_public_key, + create_hmac_signature, + crypto_random_object_id, + encode_base62, + load_private_key, + private_decrypt, + public_encrypt, +) # NOTE: Uses the same keys as in: # https://github.com/apify/apify-shared-js/blob/master/test/crypto.test.ts @@ -105,3 +113,25 @@ def test_crypto_random_object_id_length_and_charset() -> None: long_random_object_id = crypto_random_object_id(1000) for char in long_random_object_id: assert char in 'abcdefghijklmnopqrstuvwxyzABCEDFGHIJKLMNOPQRSTUVWXYZ0123456789' + + +@pytest.mark.parametrize(('test_input', 'expected'), [(0, '0'), (10, 'a'), (999999999, '15FTGf')]) +def test_encode_base62(test_input: int, expected: str) -> None: + assert encode_base62(test_input) == expected + + +# This test ensures compatibility with the JavaScript version of the same method. +# https://github.com/apify/apify-shared-js/blob/master/packages/utilities/src/hmac.ts +def test_create_valid_hmac_signature() -> None: + # This test uses the same secret key and message as in JS tests. + secret_key = 'hmac-secret-key' + message = 'hmac-message-to-be-authenticated' + assert create_hmac_signature(secret_key, message) == 'pcVagAsudj8dFqdlg7mG' + + +def test_create_same_hmac() -> None: + # This test uses the same secret key and message as in JS tests. + secret_key = 'hmac-same-secret-key' + message = 'hmac-same-message-to-be-authenticated' + assert create_hmac_signature(secret_key, message) == 'FYMcmTIm3idXqleF1Sw5' + assert create_hmac_signature(secret_key, message) == 'FYMcmTIm3idXqleF1Sw5' diff --git a/uv.lock b/uv.lock index 436deb0a..a7843e97 100644 --- a/uv.lock +++ b/uv.lock @@ -597,7 +597,7 @@ toml = [ [[package]] name = "crawlee" -version = "0.6.1" +version = "0.6.3" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "browserforge" }, @@ -617,9 +617,9 @@ dependencies = [ { name = "typing-extensions" }, { name = "yarl" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/b6/1c/102480fb8460eb317e39b11961fe63422af0df2c70e61c6c2c9f702624c9/crawlee-0.6.1.tar.gz", hash = "sha256:5af9d5ef3eaafdba4cfa73224f09b503302a3b00522288ba4f40a9ad90fdedef", size = 23642433 } +sdist = { url = "https://files.pythonhosted.org/packages/dd/55/466dca83ccc8eb2769b93d35b1077af62fc8928989efb7f9f08b2a11d139/crawlee-0.6.3.tar.gz", hash = "sha256:d1dfcbbeebaa20ef6a762ad2407f7969ce57fd59f74298b8e2d7e40a1593a199", size = 23646865 } wheels = [ - { url = "https://files.pythonhosted.org/packages/3a/60/ad20c1b9cd68b52e70ce00008f76660417db2b68e64883d053086a8b3a92/crawlee-0.6.1-py3-none-any.whl", hash = "sha256:49780603b7ac76508a30e1473d5d6cf161e08c63c31855a93e22c8a807e1d9c3", size = 240393 }, + { url = "https://files.pythonhosted.org/packages/f2/19/4c479175d9c7830b911f81495139f08a75f33a692a490189ab61f6fb73c2/crawlee-0.6.3-py3-none-any.whl", hash = "sha256:11a745c3a858c7098af4046165b6246fd2187f7bdd1180d45ed7b6e8ebc20216", size = 243799 }, ] [[package]]