Skip to content

Commit c34b073

Browse files
committed
feat: add Dataset.create_items_public_url and KeyValueStore.create_keys_public_url
1 parent 8d75b7d commit c34b073

File tree

7 files changed

+509
-16
lines changed

7 files changed

+509
-16
lines changed

src/apify_client/_utils.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,11 @@
22

33
import asyncio
44
import base64
5+
import hashlib
6+
import hmac
57
import json
68
import random
9+
import string
710
import time
811
from collections.abc import Callable
912
from http import HTTPStatus
@@ -149,3 +152,59 @@ def encode_key_value_store_record_value(value: Any, content_type: str | None = N
149152
value = json.dumps(value, ensure_ascii=False, indent=2, allow_nan=False, default=str).encode('utf-8')
150153

151154
return (value, content_type)
155+
156+
157+
# TODO: will be removed once create_hmac_signature is moved to apify_shared.utils
158+
# https://github.com/apify/apify-shared-python/pull/44
159+
CHARSET = string.digits + string.ascii_letters
160+
161+
162+
def encode_base62(num: int) -> str:
163+
"""Encode the given number to base62."""
164+
if num == 0:
165+
return CHARSET[0]
166+
167+
res = ''
168+
while num > 0:
169+
num, remainder = divmod(num, 62)
170+
res = CHARSET[remainder] + res
171+
return res
172+
173+
174+
def create_hmac_signature(secret_key: str, message: str) -> str:
175+
"""Generate an HMAC signature and encodes it using Base62. Base62 encoding reduces the signature length.
176+
177+
HMAC signature is truncated to 30 characters to make it shorter.
178+
179+
Args:
180+
secret_key (str): Secret key used for signing signatures
181+
message (str): Message to be signed
182+
183+
Returns:
184+
str: Base62 encoded signature
185+
"""
186+
signature = hmac.new(secret_key.encode('utf-8'), message.encode('utf-8'), hashlib.sha256).hexdigest()[:30]
187+
188+
decimal_signature = int(signature, 16)
189+
190+
return encode_base62(decimal_signature)
191+
192+
193+
def create_storage_signature(
194+
resource_id: str, url_signing_secret_key: str, expires_in_millis: int | None, version: int = 0
195+
) -> str:
196+
"""Create a storage signature for a resource, which can be used to generate signed URLs for accessing the resource.
197+
198+
The signature is created using HMAC with the provided secret key and includes
199+
the resource ID, expiration time, and version.
200+
201+
Note: expires_in_millis is optional. If not provided, the signature will not expire.
202+
203+
"""
204+
expires_at = int(time.time() * 1000) + expires_in_millis if expires_in_millis else 0
205+
206+
message_to_sign = f'{version}.{expires_at}.{resource_id}'
207+
hmac = create_hmac_signature(url_signing_secret_key, message_to_sign)
208+
209+
base64url_encoded_payload = base64.urlsafe_b64encode(f'{version}.{expires_at}.{hmac}'.encode())
210+
return base64url_encoded_payload.decode('utf-8')

src/apify_client/clients/resource_clients/dataset.py

Lines changed: 124 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,13 @@
33
import warnings
44
from contextlib import asynccontextmanager, contextmanager
55
from typing import TYPE_CHECKING, Any
6+
from urllib.parse import urlencode, urlparse, urlunparse
67

78
from apify_shared.models import ListPage
89
from apify_shared.utils import filter_out_none_values_recursively, ignore_docs
910

1011
from apify_client._errors import ApifyApiError
11-
from apify_client._utils import catch_not_found_or_throw, pluck_data
12+
from apify_client._utils import catch_not_found_or_throw, create_storage_signature, pluck_data
1213
from apify_client.clients.base import ResourceClient, ResourceClientAsync
1314

1415
if TYPE_CHECKING:
@@ -571,6 +572,67 @@ def get_statistics(self) -> dict | None:
571572

572573
return None
573574

575+
def create_items_public_url(
576+
self,
577+
*,
578+
offset: int | None = None,
579+
limit: int | None = None,
580+
clean: bool | None = None,
581+
desc: bool | None = None,
582+
fields: list[str] | None = None,
583+
omit: list[str] | None = None,
584+
unwind: list[str] | None = None,
585+
skip_empty: bool | None = None,
586+
skip_hidden: bool | None = None,
587+
flatten: list[str] | None = None,
588+
view: str | None = None,
589+
expires_in_millis: int | None = None,
590+
) -> str:
591+
"""Generate a URL that can be used to access dataset items.
592+
593+
If the client has permission to access the dataset's URL signing key,
594+
the URL will include a signature to verify its authenticity.
595+
596+
You can optionally control how long the signed URL should be valid using the `expires_in_millis` option.
597+
This value sets the expiration duration in milliseconds from the time the URL is generated.
598+
If not provided, the URL will not expire.
599+
600+
Any other options (like `limit` or `offset`) will be included as query parameters in the URL.
601+
602+
Returns:
603+
The public dataset items URL.
604+
"""
605+
dataset = self.get()
606+
607+
request_params = self._params(
608+
offset=offset,
609+
limit=limit,
610+
desc=desc,
611+
clean=clean,
612+
fields=fields,
613+
omit=omit,
614+
unwind=unwind,
615+
skipEmpty=skip_empty,
616+
skipHidden=skip_hidden,
617+
flatten=flatten,
618+
view=view,
619+
)
620+
621+
if dataset and 'urlSigningSecretKey' in dataset:
622+
signature = create_storage_signature(
623+
resource_id=dataset['id'],
624+
url_signing_secret_key=dataset['urlSigningSecretKey'],
625+
expires_in_millis=expires_in_millis,
626+
)
627+
request_params['signature'] = signature
628+
629+
items_public_url = urlparse(self._url('items'))
630+
filtered_params = {k: v for k, v in request_params.items() if v is not None}
631+
if filtered_params:
632+
items_public_url = items_public_url._replace(query=urlencode(filtered_params))
633+
634+
return urlunparse(items_public_url)
635+
574636

575637
class DatasetClientAsync(ResourceClientAsync):
576638
"""Async sub-client for manipulating a single dataset."""
@@ -1027,3 +1089,64 @@ async def get_statistics(self) -> dict | None:
10271089
catch_not_found_or_throw(exc)
10281090

10291091
return None
1092+
1093+
async def create_items_public_url(
1094+
self,
1095+
*,
1096+
offset: int | None = None,
1097+
limit: int | None = None,
1098+
clean: bool | None = None,
1099+
desc: bool | None = None,
1100+
fields: list[str] | None = None,
1101+
omit: list[str] | None = None,
1102+
unwind: list[str] | None = None,
1103+
skip_empty: bool | None = None,
1104+
skip_hidden: bool | None = None,
1105+
flatten: list[str] | None = None,
1106+
view: str | None = None,
1107+
expires_in_millis: int | None = None,
1108+
) -> str:
1109+
"""Generate a URL that can be used to access dataset items.
1110+
1111+
If the client has permission to access the dataset's URL signing key,
1112+
the URL will include a signature to verify its authenticity.
1113+
1114+
You can optionally control how long the signed URL should be valid using the `expires_in_millis` option.
1115+
This value sets the expiration duration in milliseconds from the time the URL is generated.
1116+
If not provided, the URL will not expire.
1117+
1118+
Any other options (like `limit` or `offset`) will be included as query parameters in the URL.
1119+
1120+
Returns:
1121+
The public dataset items URL.
1122+
"""
1123+
dataset = await self.get()
1124+
1125+
request_params = self._params(
1126+
offset=offset,
1127+
limit=limit,
1128+
desc=desc,
1129+
clean=clean,
1130+
fields=fields,
1131+
omit=omit,
1132+
unwind=unwind,
1133+
skipEmpty=skip_empty,
1134+
skipHidden=skip_hidden,
1135+
flatten=flatten,
1136+
view=view,
1137+
)
1138+
1139+
if dataset and 'urlSigningSecretKey' in dataset:
1140+
signature = create_storage_signature(
1141+
resource_id=dataset['id'],
1142+
url_signing_secret_key=dataset['urlSigningSecretKey'],
1143+
expires_in_millis=expires_in_millis,
1144+
)
1145+
request_params['signature'] = signature
1146+
1147+
items_public_url = urlparse(self._url('items'))
1148+
filtered_params = {k: v for k, v in request_params.items() if v is not None}
1149+
if filtered_params:
1150+
items_public_url = items_public_url._replace(query=urlencode(filtered_params))
1151+
1152+
return urlunparse(items_public_url)

src/apify_client/clients/resource_clients/key_value_store.py

Lines changed: 104 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,17 @@
44
from contextlib import asynccontextmanager, contextmanager
55
from http import HTTPStatus
66
from typing import TYPE_CHECKING, Any
7+
from urllib.parse import urlencode, urlparse, urlunparse
78

89
from apify_shared.utils import filter_out_none_values_recursively, ignore_docs, parse_date_fields
910

1011
from apify_client._errors import ApifyApiError
11-
from apify_client._utils import catch_not_found_or_throw, encode_key_value_store_record_value, pluck_data
12+
from apify_client._utils import (
13+
catch_not_found_or_throw,
14+
create_storage_signature,
15+
encode_key_value_store_record_value,
16+
pluck_data,
17+
)
1218
from apify_client.clients.base import ResourceClient, ResourceClientAsync
1319

1420
if TYPE_CHECKING:
@@ -287,6 +293,54 @@ def delete_record(self, key: str) -> None:
287293
timeout_secs=_SMALL_TIMEOUT,
288294
)
289295

296+
def create_keys_public_url(
297+
self,
298+
*,
299+
limit: int | None = None,
300+
exclusive_start_key: str | None = None,
301+
collection: str | None = None,
302+
prefix: str | None = None,
303+
expires_in_millis: int | None = None,
304+
) -> str:
305+
"""Generate a URL that can be used to access key-value store keys.
306+
307+
If the client has permission to access the key-value store's URL signing key,
308+
the URL will include a signature to verify its authenticity.
309+
310+
You can optionally control how long the signed URL should be valid using the `expires_in_millis` option.
311+
This value sets the expiration duration in milliseconds from the time the URL is generated.
312+
If not provided, the URL will not expire.
313+
314+
Any other options (like `limit` or `prefix`) will be included as query parameters in the URL.
315+
316+
Returns:
317+
The public key-value store keys URL.
318+
"""
319+
store = self.get()
320+
321+
request_params = self._params(
322+
limit=limit,
323+
exclusive_start_key=exclusive_start_key,
324+
collection=collection,
325+
prefix=prefix,
326+
)
327+
328+
if store and 'urlSigningSecretKey' in store:
329+
signature = create_storage_signature(
330+
resource_id=store['id'],
331+
url_signing_secret_key=store['urlSigningSecretKey'],
332+
expires_in_millis=expires_in_millis,
333+
)
334+
request_params['signature'] = signature
335+
336+
keys_public_url = urlparse(self._url('keys'))
337+
338+
filtered_params = {k: v for k, v in request_params.items() if v is not None}
339+
if filtered_params:
340+
keys_public_url = keys_public_url._replace(query=urlencode(filtered_params))
341+
342+
return urlunparse(keys_public_url)
343+
290344

291345
class KeyValueStoreClientAsync(ResourceClientAsync):
292346
"""Async sub-client for manipulating a single key-value store."""
@@ -533,3 +587,52 @@ async def delete_record(self, key: str) -> None:
533587
params=self._params(),
534588
timeout_secs=_SMALL_TIMEOUT,
535589
)
590+
591+
async def create_keys_public_url(
592+
self,
593+
*,
594+
limit: int | None = None,
595+
exclusive_start_key: str | None = None,
596+
collection: str | None = None,
597+
prefix: str | None = None,
598+
expires_in_millis: int | None = None,
599+
) -> str:
600+
"""Generate a URL that can be used to access key-value store keys.
601+
602+
If the client has permission to access the key-value store's URL signing key,
603+
the URL will include a signature to verify its authenticity.
604+
605+
You can optionally control how long the signed URL should be valid using the `expires_in_millis` option.
606+
This value sets the expiration duration in milliseconds from the time the URL is generated.
607+
If not provided, the URL will not expire.
608+
609+
Any other options (like `limit` or `prefix`) will be included as query parameters in the URL.
610+
611+
Returns:
612+
The public key-value store keys URL.
613+
"""
614+
store = await self.get()
615+
616+
keys_public_url = urlparse(self._url('keys'))
617+
618+
request_params = self._params(
619+
limit=limit,
620+
exclusive_start_key=exclusive_start_key,
621+
collection=collection,
622+
prefix=prefix,
623+
)
624+
625+
if store and 'urlSigningSecretKey' in store:
626+
signature = create_storage_signature(
627+
resource_id=store['id'],
628+
url_signing_secret_key=store['urlSigningSecretKey'],
629+
expires_in_millis=expires_in_millis,
630+
)
631+
request_params['signature'] = signature
632+
633+
keys_public_url = urlparse(self._url('keys'))
634+
filtered_params = {k: v for k, v in request_params.items() if v is not None}
635+
if filtered_params:
636+
keys_public_url = keys_public_url._replace(query=urlencode(filtered_params))
637+
638+
return urlunparse(keys_public_url)
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
import secrets
2+
import string
3+
4+
5+
def random_string(length: int = 10) -> str:
6+
return ''.join(secrets.choice(string.ascii_letters) for _ in range(length))
7+
8+
9+
def random_resource_name(resource: str) -> str:
10+
return f'python-client-test-{resource}-{random_string(5)}'

0 commit comments

Comments
 (0)