Skip to content

Commit 6eb1630

Browse files
drobnikjfnesveda
andauthored
feat: Decrypt input secrets if there are some (#45)
Co-authored-by: František Nesveda <[email protected]>
1 parent 7f4dcad commit 6eb1630

File tree

12 files changed

+262
-22
lines changed

12 files changed

+262
-22
lines changed

setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@
5959
'websockets ~= 10.4',
6060
'aiofiles ~= 22.1.0',
6161
'aioshutil ~= 1.2',
62+
'cryptography ~= 39.0.0',
6263
],
6364
extras_require={
6465
'dev': [

src/apify/_crypto.py

Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
import base64
2+
import secrets
3+
from typing import Any
4+
5+
from cryptography.exceptions import InvalidTag as InvalidTagException
6+
from cryptography.hazmat.primitives import hashes, serialization
7+
from cryptography.hazmat.primitives.asymmetric import padding, rsa
8+
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
9+
10+
from .consts import ENCRYPTED_INPUT_VALUE_REGEXP
11+
12+
ENCRYPTION_KEY_LENGTH = 32
13+
ENCRYPTION_IV_LENGTH = 16
14+
ENCRYPTION_AUTH_TAG_LENGTH = 16
15+
16+
17+
def public_encrypt(value: str, *, public_key: rsa.RSAPublicKey) -> dict:
18+
"""Encrypts the given value using AES cipher and the password for encryption using the public key.
19+
20+
The encryption password is a string of encryption key and initial vector used for cipher.
21+
It returns the encrypted password and encrypted value in BASE64 format.
22+
23+
Args:
24+
value (str): Password used to encrypt the private key encoded as base64 string.
25+
public_key (RSAPublicKey): Private key to use for decryption.
26+
27+
Returns:
28+
disc: Encrypted password and value.
29+
"""
30+
key_bytes = _crypto_random_object_id(ENCRYPTION_KEY_LENGTH).encode('utf-8')
31+
initialized_vector_bytes = _crypto_random_object_id(ENCRYPTION_IV_LENGTH).encode('utf-8')
32+
value_bytes = value.encode('utf-8')
33+
34+
password_bytes = key_bytes + initialized_vector_bytes
35+
36+
# NOTE: Auth Tag is appended to the end of the encrypted data, it has length of 16 bytes and ensures integrity of the data.
37+
cipher = Cipher(algorithms.AES(key_bytes), modes.GCM(initialized_vector_bytes, min_tag_length=ENCRYPTION_AUTH_TAG_LENGTH))
38+
encryptor = cipher.encryptor()
39+
encrypted_value_bytes = encryptor.update(value_bytes) + encryptor.finalize()
40+
encrypted_password_bytes = public_key.encrypt(
41+
password_bytes,
42+
padding.OAEP(
43+
mgf=padding.MGF1(algorithm=hashes.SHA1()),
44+
algorithm=hashes.SHA1(),
45+
label=None,
46+
),
47+
)
48+
return {
49+
'encrypted_value': base64.b64encode(encrypted_value_bytes + encryptor.tag).decode('utf-8'),
50+
'encrypted_password': base64.b64encode(encrypted_password_bytes).decode('utf-8'),
51+
}
52+
53+
54+
def private_decrypt(
55+
encrypted_password: str,
56+
encrypted_value: str,
57+
*,
58+
private_key: rsa.RSAPrivateKey,
59+
) -> str:
60+
"""Decrypts the given encrypted value using the private key and password.
61+
62+
Args:
63+
encrypted_password (str): Password used to encrypt the private key encoded as base64 string.
64+
encrypted_value (str): Encrypted value to decrypt as base64 string.
65+
private_key (RSAPrivateKey): Private key to use for decryption.
66+
67+
Returns:
68+
str: Decrypted value.
69+
"""
70+
encrypted_password_bytes = base64.b64decode(encrypted_password.encode('utf-8'))
71+
encrypted_value_bytes = base64.b64decode(encrypted_value.encode('utf-8'))
72+
73+
# Decrypt the password
74+
password_bytes = private_key.decrypt(
75+
encrypted_password_bytes,
76+
padding.OAEP(
77+
mgf=padding.MGF1(algorithm=hashes.SHA1()),
78+
algorithm=hashes.SHA1(),
79+
label=None,
80+
),
81+
)
82+
83+
if len(password_bytes) != ENCRYPTION_KEY_LENGTH + ENCRYPTION_IV_LENGTH:
84+
raise ValueError('Decryption failed, invalid password length!')
85+
86+
# Slice the encrypted into cypher and authentication tag
87+
authentication_tag_bytes = encrypted_value_bytes[-ENCRYPTION_AUTH_TAG_LENGTH:]
88+
encrypted_data_bytes = encrypted_value_bytes[:len(encrypted_value_bytes) - ENCRYPTION_AUTH_TAG_LENGTH]
89+
encryption_key_bytes = password_bytes[:ENCRYPTION_KEY_LENGTH]
90+
initialization_vector_bytes = password_bytes[ENCRYPTION_KEY_LENGTH:]
91+
92+
try:
93+
cipher = Cipher(algorithms.AES(encryption_key_bytes), modes.GCM(initialization_vector_bytes, authentication_tag_bytes))
94+
decryptor = cipher.decryptor()
95+
decipher_bytes = decryptor.update(encrypted_data_bytes) + decryptor.finalize()
96+
except InvalidTagException:
97+
raise ValueError('Decryption failed, malformed encrypted value or password.')
98+
except Exception as err:
99+
raise err
100+
101+
return decipher_bytes.decode('utf-8')
102+
103+
104+
def _load_private_key(private_key_file_base64: str, private_key_password: str) -> rsa.RSAPrivateKey:
105+
private_key = serialization.load_pem_private_key(base64.b64decode(
106+
private_key_file_base64.encode('utf-8')), password=private_key_password.encode('utf-8'))
107+
if not isinstance(private_key, rsa.RSAPrivateKey):
108+
raise ValueError('Invalid private key.')
109+
110+
return private_key
111+
112+
113+
def _load_public_key(public_key_file_base64: str) -> rsa.RSAPublicKey:
114+
public_key = serialization.load_pem_public_key(base64.b64decode(public_key_file_base64.encode('utf-8')))
115+
if not isinstance(public_key, rsa.RSAPublicKey):
116+
raise ValueError('Invalid public key.')
117+
118+
return public_key
119+
120+
121+
def _crypto_random_object_id(length: int = 17) -> str:
122+
"""Python reimplementation of cryptoRandomObjectId from `@apify/utilities`."""
123+
chars = 'abcdefghijklmnopqrstuvwxyzABCEDFGHIJKLMNOPQRSTUVWXYZ0123456789'
124+
return ''.join(secrets.choice(chars) for _ in range(length))
125+
126+
127+
def _decrypt_input_secrets(private_key: rsa.RSAPrivateKey, input: Any) -> Any:
128+
"""Decrypt input secrets."""
129+
if not isinstance(input, dict):
130+
return input
131+
132+
for key, value in input.items():
133+
if isinstance(value, str):
134+
match = ENCRYPTED_INPUT_VALUE_REGEXP.fullmatch(value)
135+
if match:
136+
encrypted_password = match.group(1)
137+
encrypted_value = match.group(2)
138+
input[key] = private_decrypt(
139+
encrypted_password,
140+
encrypted_value,
141+
private_key=private_key,
142+
)
143+
144+
return input

src/apify/_utils.py

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
import mimetypes
1111
import os
1212
import re
13-
import secrets
1413
import sys
1514
import time
1615
from collections import OrderedDict
@@ -324,12 +323,6 @@ def wrapper(*args: Any, **kwargs: Any) -> Any:
324323
return cast(MetadataType, wrapper)
325324

326325

327-
def _crypto_random_object_id(length: int = 17) -> str:
328-
"""Python reimplementation of cryptoRandomObjectId from `@apify/utilities`."""
329-
chars = 'abcdefghijklmnopqrstuvwxyzABCEDFGHIJKLMNOPQRSTUVWXYZ0123456789'
330-
return ''.join(secrets.choice(chars) for _ in range(length))
331-
332-
333326
T = TypeVar('T')
334327

335328

src/apify/actor.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from apify_client import ApifyClientAsync
1111
from apify_client.consts import WebhookEventType
1212

13+
from ._crypto import _decrypt_input_secrets, _load_private_key
1314
from ._utils import (
1415
_fetch_and_parse_env_var,
1516
_get_cpu_usage_percent,
@@ -584,9 +585,17 @@ async def get_input(cls) -> Any:
584585
async def _get_input_internal(self) -> Any:
585586
self._raise_if_not_initialized()
586587

587-
# TODO: decryption
588+
input_value = await self.get_value(self._config.input_key)
589+
input_secrets_private_key = self._config.input_secrets_private_key_file
590+
input_secrets_key_passphrase = self._config.input_secrets_private_key_passphrase
591+
if input_secrets_private_key and input_secrets_key_passphrase:
592+
private_key = _load_private_key(
593+
input_secrets_private_key,
594+
input_secrets_key_passphrase,
595+
)
596+
input_value = _decrypt_input_secrets(private_key, input_value)
588597

589-
return await self.get_value(self._config.input_key)
598+
return input_value
590599

591600
@classmethod
592601
async def get_value(cls, key: str) -> Any:

src/apify/consts.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import re
12
from enum import Enum
23
from typing import List, Literal, get_args
34

@@ -159,3 +160,7 @@ class StorageTypes(str, Enum):
159160
REQUEST_QUEUE_HEAD_MAX_LIMIT = 1000
160161

161162
EVENT_LISTENERS_TIMEOUT_SECS = 5
163+
164+
BASE64_REGEXP = '[-A-Za-z0-9+/]*={0,3}'
165+
ENCRYPTED_INPUT_VALUE_PREFIX = 'ENCRYPTED_VALUE'
166+
ENCRYPTED_INPUT_VALUE_REGEXP = re.compile(f'^{ENCRYPTED_INPUT_VALUE_PREFIX}:({BASE64_REGEXP}):({BASE64_REGEXP})$')

src/apify/storages/request_queue.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,8 @@
1010
from apify_client import ApifyClientAsync
1111
from apify_client.clients import RequestQueueClientAsync
1212

13-
from .._utils import LRUCache, _budget_ow, _crypto_random_object_id, _unique_key_to_request_id
13+
from .._crypto import _crypto_random_object_id
14+
from .._utils import LRUCache, _budget_ow, _unique_key_to_request_id
1415
from ..config import Configuration
1516
from ..consts import REQUEST_QUEUE_HEAD_MAX_LIMIT
1617
from ..memory_storage import MemoryStorage

tests/integration/_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from apify._utils import _crypto_random_object_id
1+
from apify._crypto import _crypto_random_object_id
22

33

44
def generate_unique_resource_name(label: str) -> str:

tests/integration/test_actor_api_helpers.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import json
33

44
from apify import Actor
5-
from apify._utils import _crypto_random_object_id
5+
from apify._crypto import _crypto_random_object_id
66
from apify_client import ApifyClientAsync
77

88
from ._utils import generate_unique_resource_name

tests/integration/test_fixtures.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from datetime import datetime, timezone
22

33
from apify import Actor
4-
from apify._utils import _crypto_random_object_id
4+
from apify._crypto import _crypto_random_object_id
55
from apify_client import ApifyClientAsync
66

77
from .conftest import ActorFactory

tests/unit/actor/test_actor_key_value_store.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,13 @@
11
import pytest
22

33
from apify import Actor
4+
from apify._crypto import public_encrypt
45
from apify._utils import _json_dumps
6+
from apify.consts import ENCRYPTED_INPUT_VALUE_PREFIX, ApifyEnvVars
57
from apify.memory_storage import MemoryStorage
68

9+
from ..test_crypto import PRIVATE_KEY_PASSWORD, PRIVATE_KEY_PEM_BASE64, PUBLIC_KEY
10+
711

812
# NOTE: We only test the key-value store methond available on Actor class/instance. Actual tests for the implementations are in storages/.
913
class TestOpenKeyValueStore:
@@ -42,3 +46,24 @@ async def test_get_input(self, memory_storage: MemoryStorage) -> None:
4246
async with Actor() as my_actor:
4347
input = await my_actor.get_input()
4448
assert input['foo'] == test_input['foo']
49+
50+
async def test_get_input_with_secrets(self, memory_storage: MemoryStorage, monkeypatch: pytest.MonkeyPatch) -> None:
51+
monkeypatch.setenv(ApifyEnvVars.INPUT_SECRETS_PRIVATE_KEY_FILE, PRIVATE_KEY_PEM_BASE64)
52+
monkeypatch.setenv(ApifyEnvVars.INPUT_SECRETS_PRIVATE_KEY_PASSPHRASE, PRIVATE_KEY_PASSWORD)
53+
input_key = 'INPUT'
54+
secret_string = 'secret-string'
55+
encrypted_secret = public_encrypt(secret_string, public_key=PUBLIC_KEY)
56+
input_with_secret = {
57+
'foo': 'bar',
58+
'secret': f'{ENCRYPTED_INPUT_VALUE_PREFIX}:{encrypted_secret["encrypted_password"]}:{encrypted_secret["encrypted_value"]}',
59+
}
60+
kvs_info = await memory_storage.key_value_stores().get_or_create(name='default')
61+
await memory_storage.key_value_store(kvs_info['id']).set_record(
62+
key=input_key,
63+
value=_json_dumps(input_with_secret),
64+
content_type='application/json',
65+
)
66+
async with Actor() as my_actor:
67+
input = await my_actor.get_input()
68+
assert input['foo'] == input_with_secret['foo']
69+
assert input['secret'] == secret_string

0 commit comments

Comments
 (0)