Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions src/apify/_crypto.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from __future__ import annotations

import base64
import hashlib
import hmac
from typing import Any

from cryptography.exceptions import InvalidTag as InvalidTagException
Expand Down Expand Up @@ -153,3 +155,37 @@ def decrypt_input_secrets(private_key: rsa.RSAPrivateKey, input_data: Any) -> An
)

return input_data


CHARSET = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Preferably use predefined standard string constants: string.digits+string.ascii_letters
https://docs.python.org/3/library/string.html#string-constants



def encode_base62(num: int) -> str:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This method alone is perfect for writing unit test for. Could you please add one parametrized test with couple of input values.

https://docs.pytest.org/en/stable/how-to/parametrize.html#pytest-mark-parametrize

"""Encode the given number to base62."""
if num == 0:
return CHARSET[0]

res = ''
while num > 0:
num, remainder = divmod(num, 62)
res = CHARSET[remainder] + res
return res


# createHmacSignature
@ignore_docs
def create_hmac_signature(secret_key: str, message: str) -> str:
"""Generates an HMAC signature and encodes it using Base62. Base62 encoding reduces the signature length.

Args:
secret_key (str): Secret key used for signing signatures
message (str): Message to be signed

Returns:
str: Base62 encoded signature
"""
signature = hmac.new(secret_key.encode('utf-8'), message.encode('utf-8'), hashlib.sha256).hexdigest()[:30]

decimal_signature = int(signature, 16)

return encode_base62(decimal_signature)
11 changes: 9 additions & 2 deletions src/apify/apify_storage_client/_key_value_store_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,11 @@

from typing_extensions import override

from crawlee.storage_clients._base import BaseKeyValueStoreClient
from crawlee.storage_clients._base import KeyValueStoreClient as BaseKeyValueStoreClient
from crawlee.storage_clients.models import KeyValueStoreListKeysPage, KeyValueStoreMetadata, KeyValueStoreRecord

from apify._crypto import create_hmac_signature

if TYPE_CHECKING:
from collections.abc import AsyncIterator
from contextlib import AbstractAsyncContextManager
Expand Down Expand Up @@ -90,5 +92,10 @@ async def get_public_url(self, key: str) -> str:
key: The key for which the URL should be generated.
"""
public_api_url = self._api_public_base_url
public_url = f'{public_api_url}/v2/key-value-stores/{self._client.resource_id}/records/{key}'

url_signing_secret_key = getattr(self.storage_object, 'url_signing_secret_key', None) # type: ignore[attr-defined]
Copy link
Contributor Author

@danpoletaev danpoletaev Feb 19, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm a bit worried that url_signing_secret_key isn't being properly transformed from camelCase? In the API response, it shows up as urlSigningSecretKey

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am confused about the ignore. I guess it is complaining about self.storage_object in such case we should not ignore it as it could raise an exception in cases where self does not have storage_object

I guess you can remove it and wait for your other change in crawlee to be merged and that should silence the warning.

if url_signing_secret_key:
public_url += f'?signature={create_hmac_signature(url_signing_secret_key, key)}'

return f'{public_api_url}/v2/key-value-stores/{self._client.resource_id}/records/{key}'
return public_url
15 changes: 11 additions & 4 deletions tests/integration/test_actor_key_value_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

from ._utils import generate_unique_resource_name
from apify import Actor
from apify._crypto import create_hmac_signature

if TYPE_CHECKING:
import pytest
Expand Down Expand Up @@ -210,10 +211,16 @@ async def main() -> None:
default_store_id = Actor.config.default_key_value_store_id

store = await Actor.open_key_value_store()
record_url = await cast(KeyValueStoreClient, store._resource_client).get_public_url('dummy')
print(record_url)

assert record_url == f'{public_api_url}/v2/key-value-stores/{default_store_id}/records/dummy'
record_key = 'dummy'
record_url = await cast(KeyValueStoreClient, store._resource_client).get_public_url(record_key)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Huh, I don't think we need to access KeyValueStoreClient._resource_client anymore - just KeyValueStore.get_public_url should be enough.

url_signing_secret_key = cast(str, getattr(store.storage_object, 'url_signing_secret_key', None))
signature = create_hmac_signature(url_signing_secret_key, record_key)

assert url_signing_secret_key is not None
assert (
record_url
== f'{public_api_url}/v2/key-value-stores/{default_store_id}/records/{record_key}?signature={signature}'
)

actor = await make_actor(label='kvs-get-public-url', main_func=main)
run_result = await run_actor(actor)
Expand Down
26 changes: 25 additions & 1 deletion tests/unit/test_crypto.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,14 @@

import pytest

from apify._crypto import _load_public_key, crypto_random_object_id, load_private_key, private_decrypt, public_encrypt
from apify._crypto import (
_load_public_key,
create_hmac_signature,
crypto_random_object_id,
load_private_key,
private_decrypt,
public_encrypt,
)

# NOTE: Uses the same keys as in:
# https://github.com/apify/apify-shared-js/blob/master/test/crypto.test.ts
Expand Down Expand Up @@ -105,3 +112,20 @@ def test_crypto_random_object_id_length_and_charset() -> None:
long_random_object_id = crypto_random_object_id(1000)
for char in long_random_object_id:
assert char in 'abcdefghijklmnopqrstuvwxyzABCEDFGHIJKLMNOPQRSTUVWXYZ0123456789'


# Check if the method is compatible with js version of the same method in:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure what is the comment telling the reader. Could you rephrase it please. Currently it looks like a remainder to the author of the change to do something

# https://github.com/apify/apify-shared-js/blob/master/packages/utilities/src/hmac.ts
def test_create_valid_hmac_signature() -> None:
# This test uses the same secret key and message as in JS tests.
secret_key = 'hmac-secret-key'
message = 'hmac-message-to-be-authenticated'
assert create_hmac_signature(secret_key, message) == 'pcVagAsudj8dFqdlg7mG'


def test_create_same_hmac() -> None:
# This test uses the same secret key and message as in JS tests.
secret_key = 'hmac-same-secret-key'
message = 'hmac-same-message-to-be-authenticated'
for _ in range(5):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why stress testing it? Is there some non-determinism involved?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is no non-determinism involved, but I'd rather be sure, that for same input we get same output.
Refactored test to avoid "stress testing"

assert create_hmac_signature(secret_key, message) == 'FYMcmTIm3idXqleF1Sw5'
Loading