Skip to content

Commit 24f2d8c

Browse files
authored
Move Scrapy-related code from Actor template to SDK (#134)
1 parent e238be4 commit 24f2d8c

File tree

22 files changed

+465
-36
lines changed

22 files changed

+465
-36
lines changed

.flake8

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ filename =
66
per-file-ignores =
77
scripts/*: D
88
tests/*: D
9+
**/__init__.py: F401
910

1011
# Google docstring convention + D204 & D401
1112
docstring-convention = all

CHANGELOG.md

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
Changelog
22
=========
33

4-
[1.2.1](../../releases/tag/v1.2.1) - Unreleased
4+
[1.3.0](../../releases/tag/v1.3.0) - Unreleased
55
-----------------------------------------------
66

7-
...
7+
### Added
8+
9+
- Added `scrapy` extra
810

911
[1.2.0](../../releases/tag/v1.2.0) - 2023-10-23
1012
-----------------------------------------------

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ clean:
88

99
install-dev:
1010
python -m pip install --upgrade pip
11-
pip install --no-cache-dir -e ".[dev]"
11+
pip install --no-cache-dir -e ".[dev,scrapy]"
1212
pre-commit install
1313

1414
build:

README.md

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,22 @@ event handling.
77
If you just need to access the [Apify API](https://docs.apify.com/api/v2) from your Python applications,
88
check out the [Apify Client for Python](https://docs.apify.com/api/client/python) instead.
99

10+
## Installation
11+
12+
The Apify SDK for Python is available on PyPI as the `apify` package.
13+
For default installation, using Pip, run the following:
14+
15+
```bash
16+
pip install apify
17+
```
18+
19+
For users interested in integrating Apify with Scrapy, we provide a package extra called `scrapy`.
20+
To install Apify with the `scrapy` extra, use the following command:
21+
22+
```bash
23+
pip install apify[scrapy]
24+
```
25+
1026
## Documentation
1127

1228
For usage instructions, check the documentation on [Apify Docs](https://docs.apify.com/sdk/python/).

mypy.ini

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,3 +14,9 @@ warn_redundant_casts = True
1414
warn_return_any = True
1515
warn_unreachable = True
1616
warn_unused_ignores = True
17+
18+
[mypy-scrapy.*]
19+
ignore_missing_imports = True
20+
21+
[mypy-sortedcollections.*]
22+
ignore_missing_imports = True

pyproject.toml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "apify"
3-
version = "1.2.1"
3+
version = "1.3.0"
44
description = "Apify SDK for Python"
55
readme = "README.md"
66
license = {text = "Apache Software License"}
@@ -72,6 +72,9 @@ dev = [
7272
"types-colorama ~= 0.4.15.11",
7373
"types-psutil ~= 5.9.5.12",
7474
]
75+
scrapy = [
76+
"scrapy ~= 2.11.0",
77+
]
7578

7679
[project.urls]
7780
"Homepage" = "https://docs.apify.com/sdk/python/"

src/apify/_crypto.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,8 @@ def public_encrypt(value: str, *, public_key: rsa.RSAPublicKey) -> dict:
3030
Returns:
3131
disc: Encrypted password and value.
3232
"""
33-
key_bytes = _crypto_random_object_id(ENCRYPTION_KEY_LENGTH).encode('utf-8')
34-
initialized_vector_bytes = _crypto_random_object_id(ENCRYPTION_IV_LENGTH).encode('utf-8')
33+
key_bytes = crypto_random_object_id(ENCRYPTION_KEY_LENGTH).encode('utf-8')
34+
initialized_vector_bytes = crypto_random_object_id(ENCRYPTION_IV_LENGTH).encode('utf-8')
3535
value_bytes = value.encode('utf-8')
3636

3737
password_bytes = key_bytes + initialized_vector_bytes
@@ -122,7 +122,7 @@ def _load_public_key(public_key_file_base64: str) -> rsa.RSAPublicKey:
122122
return public_key
123123

124124

125-
def _crypto_random_object_id(length: int = 17) -> str:
125+
def crypto_random_object_id(length: int = 17) -> str:
126126
"""Python reimplementation of cryptoRandomObjectId from `@apify/utilities`."""
127127
chars = 'abcdefghijklmnopqrstuvwxyzABCEDFGHIJKLMNOPQRSTUVWXYZ0123456789'
128128
return ''.join(secrets.choice(chars) for _ in range(length))

src/apify/_memory_storage/resource_clients/dataset.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from apify_shared.types import JSONSerializable
1111
from apify_shared.utils import ignore_docs
1212

13-
from ..._crypto import _crypto_random_object_id
13+
from ..._crypto import crypto_random_object_id
1414
from ..._utils import _force_rename, _raise_on_duplicate_storage, _raise_on_non_existing_storage
1515
from ...consts import _StorageTypes
1616
from ..file_storage_utils import _update_dataset_items, _update_metadata
@@ -52,7 +52,7 @@ def __init__(
5252
name: Optional[str] = None,
5353
) -> None:
5454
"""Initialize the DatasetClient."""
55-
self._id = id or _crypto_random_object_id()
55+
self._id = id or crypto_random_object_id()
5656
self._resource_directory = os.path.join(base_storage_directory, name or self._id)
5757
self._memory_storage_client = memory_storage_client
5858
self._name = name

src/apify/_memory_storage/resource_clients/key_value_store.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515

1616
from apify_shared.utils import ignore_docs, is_file_or_bytes, json_dumps
1717

18-
from ..._crypto import _crypto_random_object_id
18+
from ..._crypto import crypto_random_object_id
1919
from ..._utils import (
2020
_force_remove,
2121
_force_rename,
@@ -73,7 +73,7 @@ def __init__(
7373
name: Optional[str] = None,
7474
) -> None:
7575
"""Initialize the KeyValueStoreClient."""
76-
self._id = id or _crypto_random_object_id()
76+
self._id = id or crypto_random_object_id()
7777
self._resource_directory = os.path.join(base_storage_directory, name or self._id)
7878
self._memory_storage_client = memory_storage_client
7979
self._name = name

src/apify/_memory_storage/resource_clients/request_queue.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,11 @@
66
from typing import TYPE_CHECKING, Dict, List, Optional
77

88
import aioshutil
9-
from sortedcollections import ValueSortedDict # type: ignore
9+
from sortedcollections import ValueSortedDict
1010

1111
from apify_shared.utils import filter_out_none_values_recursively, ignore_docs, json_dumps
1212

13-
from ..._crypto import _crypto_random_object_id
13+
from ..._crypto import crypto_random_object_id
1414
from ..._utils import _force_rename, _raise_on_duplicate_storage, _raise_on_non_existing_storage, _unique_key_to_request_id
1515
from ...consts import _StorageTypes
1616
from ..file_storage_utils import _delete_request, _update_metadata, _update_request_queue_item
@@ -46,7 +46,7 @@ def __init__(
4646
name: Optional[str] = None,
4747
) -> None:
4848
"""Initialize the RequestQueueClient."""
49-
self._id = id or _crypto_random_object_id()
49+
self._id = id or crypto_random_object_id()
5050
self._resource_directory = os.path.join(base_storage_directory, name or self._id)
5151
self._memory_storage_client = memory_storage_client
5252
self._name = name

0 commit comments

Comments
 (0)