Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions src/apify/_crypto.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
from __future__ import annotations

import base64
import hashlib
import hmac
import string
from typing import Any

from cryptography.exceptions import InvalidTag as InvalidTagException
Expand Down Expand Up @@ -153,3 +156,38 @@ def decrypt_input_secrets(private_key: rsa.RSAPrivateKey, input_data: Any) -> An
)

return input_data


CHARSET = string.digits + string.ascii_letters


def encode_base62(num: int) -> str:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This method alone is perfect for writing unit test for. Could you please add one parametrized test with couple of input values.

https://docs.pytest.org/en/stable/how-to/parametrize.html#pytest-mark-parametrize

"""Encode the given number to base62."""
if num == 0:
return CHARSET[0]

res = ''
while num > 0:
num, remainder = divmod(num, 62)
res = CHARSET[remainder] + res
return res


@ignore_docs
def create_hmac_signature(secret_key: str, message: str) -> str:
"""Generates an HMAC signature and encodes it using Base62. Base62 encoding reduces the signature length.

HMAC signature is truncated to 30 characters to make it shorter.

Args:
secret_key (str): Secret key used for signing signatures
message (str): Message to be signed

Returns:
str: Base62 encoded signature
"""
signature = hmac.new(secret_key.encode('utf-8'), message.encode('utf-8'), hashlib.sha256).hexdigest()[:30]

decimal_signature = int(signature, 16)

return encode_base62(decimal_signature)
9 changes: 8 additions & 1 deletion src/apify/apify_storage_client/_key_value_store_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
from crawlee.storage_clients._base import BaseKeyValueStoreClient
from crawlee.storage_clients.models import KeyValueStoreListKeysPage, KeyValueStoreMetadata, KeyValueStoreRecord

from apify._crypto import create_hmac_signature

if TYPE_CHECKING:
from collections.abc import AsyncIterator
from contextlib import AbstractAsyncContextManager
Expand Down Expand Up @@ -90,5 +92,10 @@ async def get_public_url(self, key: str) -> str:
key: The key for which the URL should be generated.
"""
public_api_url = self._api_public_base_url
public_url = f'{public_api_url}/v2/key-value-stores/{self._client.resource_id}/records/{key}'

url_signing_secret_key = getattr(self.storage_object, 'url_signing_secret_key', None)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Strictly speaking, there is no guarantee if there is an attribute called url_signing_secret_key on the storage object that it will actually be what you expect. Could you use an isinstance check instead to make sure that the storage_object is of the correct type?

if url_signing_secret_key:
public_url += f'?signature={create_hmac_signature(url_signing_secret_key, key)}'
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you please use yarl.URL to build the URL? It is already a dependency of Crawlee.


return f'{public_api_url}/v2/key-value-stores/{self._client.resource_id}/records/{key}'
return public_url
15 changes: 11 additions & 4 deletions tests/integration/test_actor_key_value_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

from ._utils import generate_unique_resource_name
from apify import Actor
from apify._crypto import create_hmac_signature

if TYPE_CHECKING:
import pytest
Expand Down Expand Up @@ -210,10 +211,16 @@ async def main() -> None:
default_store_id = Actor.config.default_key_value_store_id

store = await Actor.open_key_value_store()
record_url = await cast(KeyValueStoreClient, store._resource_client).get_public_url('dummy')
print(record_url)

assert record_url == f'{public_api_url}/v2/key-value-stores/{default_store_id}/records/dummy'
record_key = 'dummy'
record_url = await cast(KeyValueStoreClient, store._resource_client).get_public_url(record_key)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Huh, I don't think we need to access KeyValueStoreClient._resource_client anymore - just KeyValueStore.get_public_url should be enough.

url_signing_secret_key = cast(str, getattr(store.storage_object, 'url_signing_secret_key', None))
signature = create_hmac_signature(url_signing_secret_key, record_key)

assert url_signing_secret_key is not None
assert (
record_url
== f'{public_api_url}/v2/key-value-stores/{default_store_id}/records/{record_key}?signature={signature}'
)

actor = await make_actor(label='kvs-get-public-url', main_func=main)
run_result = await run_actor(actor)
Expand Down
32 changes: 31 additions & 1 deletion tests/unit/test_crypto.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,15 @@

import pytest

from apify._crypto import _load_public_key, crypto_random_object_id, load_private_key, private_decrypt, public_encrypt
from apify._crypto import (
_load_public_key,
create_hmac_signature,
crypto_random_object_id,
encode_base62,
load_private_key,
private_decrypt,
public_encrypt,
)

# NOTE: Uses the same keys as in:
# https://github.com/apify/apify-shared-js/blob/master/test/crypto.test.ts
Expand Down Expand Up @@ -105,3 +113,25 @@ def test_crypto_random_object_id_length_and_charset() -> None:
long_random_object_id = crypto_random_object_id(1000)
for char in long_random_object_id:
assert char in 'abcdefghijklmnopqrstuvwxyzABCEDFGHIJKLMNOPQRSTUVWXYZ0123456789'


@pytest.mark.parametrize(('test_input', 'expected'), [(0, '0'), (10, 'a'), (999999999, '15FTGf')])
def test_encode_base62(test_input: int, expected: str) -> None:
assert encode_base62(test_input) == expected


# This test ensures compatibility with the JavaScript version of the same method.
# https://github.com/apify/apify-shared-js/blob/master/packages/utilities/src/hmac.ts
def test_create_valid_hmac_signature() -> None:
# This test uses the same secret key and message as in JS tests.
secret_key = 'hmac-secret-key'
message = 'hmac-message-to-be-authenticated'
assert create_hmac_signature(secret_key, message) == 'pcVagAsudj8dFqdlg7mG'


def test_create_same_hmac() -> None:
# This test uses the same secret key and message as in JS tests.
secret_key = 'hmac-same-secret-key'
message = 'hmac-same-message-to-be-authenticated'
assert create_hmac_signature(secret_key, message) == 'FYMcmTIm3idXqleF1Sw5'
assert create_hmac_signature(secret_key, message) == 'FYMcmTIm3idXqleF1Sw5'
Loading