From cc940235936aa8142a88065804dfcf3b2f11a88b Mon Sep 17 00:00:00 2001
From: Josef Prochazka <josef.prochazka@apify.com>
Date: Tue, 12 Nov 2024 14:10:58 +0100
Subject: [PATCH 01/20] Draft example of helper function to create RequestList

---
 src/apify/_actor.py | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/src/apify/_actor.py b/src/apify/_actor.py
index f60a99df..3d6255f1 100644
--- a/src/apify/_actor.py
+++ b/src/apify/_actor.py
@@ -6,6 +6,7 @@
 from datetime import timedelta
 from typing import TYPE_CHECKING, Any, Callable, TypeVar, cast
 
+from crawlee.storages import RequestList
 from lazy_object_proxy import Proxy
 from pydantic import AliasChoices
 from typing_extensions import Self
@@ -13,7 +14,7 @@
 from apify_client import ApifyClientAsync
 from apify_shared.consts import ActorEnvVars, ActorExitCodes, ApifyEnvVars
 from apify_shared.utils import ignore_docs, maybe_extract_enum_member_value
-from crawlee import service_container
+from crawlee import service_container, Request
 from crawlee.events._types import Event, EventPersistStateData
 
 from apify._configuration import Configuration
@@ -974,6 +975,20 @@ async def create_proxy_configuration(
 
         return proxy_configuration
 
+    @staticmethod
+    def create_request_list(
+        *,
+        actor_start_urls_input: dict
+    ) ->RequestList:
+        return RequestList(requests=[
+            Request.from_url(
+                method=request_input.get("method"),
+                url=request_input.get("url"),
+                payload=request_input.get("payload", "").encode("utf-8"),
+                headers=request_input.get("headers", {}),
+                user_data=request_input.get("userData", {}),
+            ) for request_input in actor_start_urls_input])
+
 
 Actor = cast(_ActorType, Proxy(_ActorType))
 """The entry point of the SDK, through which all the Actor operations should be done."""

From ded055d893b3349bfd658eeb9910298748958459 Mon Sep 17 00:00:00 2001
From: Josef Prochazka <josef.prochazka@apify.com>
Date: Thu, 14 Nov 2024 14:41:29 +0100
Subject: [PATCH 02/20] Add test for simple input

---
 src/apify/_actor.py                           | 29 +++++++-------
 .../test_actor_create_proxy_configuration.py  | 40 +++++++++++++++++++
 2 files changed, 55 insertions(+), 14 deletions(-)

diff --git a/src/apify/_actor.py b/src/apify/_actor.py
index 3d6255f1..06bc2fb8 100644
--- a/src/apify/_actor.py
+++ b/src/apify/_actor.py
@@ -6,7 +6,6 @@
 from datetime import timedelta
 from typing import TYPE_CHECKING, Any, Callable, TypeVar, cast
 
-from crawlee.storages import RequestList
 from lazy_object_proxy import Proxy
 from pydantic import AliasChoices
 from typing_extensions import Self
@@ -14,8 +13,9 @@
 from apify_client import ApifyClientAsync
 from apify_shared.consts import ActorEnvVars, ActorExitCodes, ApifyEnvVars
 from apify_shared.utils import ignore_docs, maybe_extract_enum_member_value
-from crawlee import service_container, Request
+from crawlee import Request, service_container
 from crawlee.events._types import Event, EventPersistStateData
+from crawlee.storages import RequestList
 
 from apify._configuration import Configuration
 from apify._consts import EVENT_LISTENERS_TIMEOUT
@@ -976,18 +976,19 @@ async def create_proxy_configuration(
         return proxy_configuration
 
     @staticmethod
-    def create_request_list(
-        *,
-        actor_start_urls_input: dict
-    ) ->RequestList:
-        return RequestList(requests=[
-            Request.from_url(
-                method=request_input.get("method"),
-                url=request_input.get("url"),
-                payload=request_input.get("payload", "").encode("utf-8"),
-                headers=request_input.get("headers", {}),
-                user_data=request_input.get("userData", {}),
-            ) for request_input in actor_start_urls_input])
+    def create_request_list(*, actor_start_urls_input: dict) -> RequestList:
+        return RequestList(
+            requests=[
+                Request.from_url(
+                    method=request_input.get('method'),
+                    url=request_input.get('url'),
+                    payload=request_input.get('payload', '').encode('utf-8'),
+                    headers=request_input.get('headers', {}),
+                    user_data=request_input.get('userData', {}),
+                )
+                for request_input in actor_start_urls_input
+            ]
+        )
 
 
 Actor = cast(_ActorType, Proxy(_ActorType))
diff --git a/tests/unit/actor/test_actor_create_proxy_configuration.py b/tests/unit/actor/test_actor_create_proxy_configuration.py
index e0c7cd57..b5dd293e 100644
--- a/tests/unit/actor/test_actor_create_proxy_configuration.py
+++ b/tests/unit/actor/test_actor_create_proxy_configuration.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import typing
 from typing import TYPE_CHECKING
 
 import httpx
@@ -7,6 +8,8 @@
 
 from apify_client import ApifyClientAsync
 from apify_shared.consts import ApifyEnvVars
+from crawlee._request import UserData
+from crawlee._types import HttpHeaders, HttpMethod
 
 from apify import Actor
 
@@ -141,3 +144,40 @@ async def test_proxy_configuration_with_actor_proxy_input(
     assert len(route.calls) == 2
 
     await Actor.exit()
+
+
+@pytest.mark.parametrize('request_method', typing.get_args(HttpMethod))
+@pytest.mark.parametrize(
+    'optional_input',
+    [
+        {},
+        {'payload': 'some payload', 'userData': {'some key': 'some value'}, 'headers': {'h1': 'v1', 'h2': 'v2'}},
+    ],
+    ids=['minimal', 'all_options'],
+)
+async def test_actor_create_request_list_request_types(
+    request_method: HttpMethod, optional_input: dict[str, str]
+) -> None:
+    """Tests proper request list generation from both minimal and full inputs for all method types."""
+    minimal_request_dict_input = {'url': 'https://www.abc.com', 'method': request_method}
+    request_dict_input = {**minimal_request_dict_input, **optional_input}
+    example_start_urls_input = [
+        request_dict_input,
+    ]
+
+    generated_request_list = Actor.create_request_list(actor_start_urls_input=example_start_urls_input)
+
+    assert not await generated_request_list.is_empty()
+    generated_request = await generated_request_list.fetch_next_request()
+    assert await generated_request_list.is_empty()
+
+    assert generated_request.method == request_dict_input['method']
+    assert generated_request.url == request_dict_input['url']
+    assert generated_request.payload == request_dict_input.get('payload', '').encode('utf-8')
+    expected_user_data = UserData()
+    if 'userData' in optional_input:
+        for key, value in optional_input['userData'].items():
+            expected_user_data[key] = value
+    assert generated_request.user_data == expected_user_data
+    expected_headers = HttpHeaders(root=optional_input.get('headers', {}))
+    assert generated_request.headers == expected_headers

From 57dd329001c123f815c9306fcbea78a683fe7bc9 Mon Sep 17 00:00:00 2001
From: Josef Prochazka <josef.prochazka@apify.com>
Date: Fri, 15 Nov 2024 11:32:10 +0100
Subject: [PATCH 03/20] WIP

---
 src/apify/_actor.py                           | 43 +++++++++++++++---
 .../test_actor_create_proxy_configuration.py  | 45 ++++++++++++++++++-
 2 files changed, 81 insertions(+), 7 deletions(-)

diff --git a/src/apify/_actor.py b/src/apify/_actor.py
index 06bc2fb8..06023400 100644
--- a/src/apify/_actor.py
+++ b/src/apify/_actor.py
@@ -2,10 +2,13 @@
 
 import asyncio
 import os
+import re
 import sys
 from datetime import timedelta
+from itertools import chain
 from typing import TYPE_CHECKING, Any, Callable, TypeVar, cast
 
+from crawlee.http_clients import HttpxHttpClient, HttpResponse, BaseHttpClient
 from lazy_object_proxy import Proxy
 from pydantic import AliasChoices
 from typing_extensions import Self
@@ -39,6 +42,10 @@
 
 MainReturnType = TypeVar('MainReturnType')
 
+URL_NO_COMMAS_REGEX = re.compile(r"https?:\/\/(www\.)?([a-zA-Z0-9]|[a-zA-Z0-9][-a-zA-Z0-9@:%._+~#=]{0,254}[a-zA-Z0-9])\.[a-z]{2,63}(:\d{1,5})?(\/[-a-zA-Z0-9@:%_+.~#?&/=()]*)?")
+# JS version. TODO rewrite to Python regexp
+# /https?:\/\/(www\.)?([\p{L}0-9]|[\p{L}0-9][-\p{L}0-9@:%._+~#=]{0,254}[\p{L}0-9])\.[a-z]{2,63}(:\d{1,5})?(\/[-\p{L}0-9@:%_+.~#?&/=()]*)?/giu;
+
 
 class _ActorType:
     """The class of `Actor`. Only make a new instance if you're absolutely sure you need to."""
@@ -976,9 +983,18 @@ async def create_proxy_configuration(
         return proxy_configuration
 
     @staticmethod
-    def create_request_list(*, actor_start_urls_input: dict) -> RequestList:
-        return RequestList(
-            requests=[
+    async def create_request_list(*, actor_start_urls_input: dict, http_client: BaseHttpClient = HttpxHttpClient()) -> RequestList:
+        simple_url_requests_inputs = [request_input for request_input in actor_start_urls_input if "url" in request_input]
+        remote_url_requests_inputs = [request_input for request_input in actor_start_urls_input if "requestsFromUrl" in request_input]
+
+        simple_url_requests = Actor._create_requests_from_input(simple_url_requests_inputs)
+        remote_url_requests = await Actor._create_requests_from_url(remote_url_requests_inputs, http_client=http_client)
+
+        return RequestList(requests=simple_url_requests + remote_url_requests)
+
+    @staticmethod
+    def _create_requests_from_input(simple_url_requests_inputs: list[dict[str,str]]) -> list[Request]:
+        return [
                 Request.from_url(
                     method=request_input.get('method'),
                     url=request_input.get('url'),
@@ -986,10 +1002,25 @@ def create_request_list(*, actor_start_urls_input: dict) -> RequestList:
                     headers=request_input.get('headers', {}),
                     user_data=request_input.get('userData', {}),
                 )
-                for request_input in actor_start_urls_input
-            ]
-        )
+                for request_input in simple_url_requests_inputs]
 
+    @staticmethod
+    async def _create_requests_from_url(remote_url_requests_inputs: list[dict[str,str]], http_client: BaseHttpClient ) -> list[Request]:
+        remote_url_requests = []
+        for input in remote_url_requests_inputs:
+            remote_url_requests.append(asyncio.create_task(http_client.send_request(
+                url=input["requestsFromUrl"],
+                headers=input.get("headers", {}),
+                payload=input.get("payload", "").encode('utf-8'),
+            )))
+        await asyncio.gather(*remote_url_requests)
+        # TODO as callbacks
+        return list(chain.from_iterable((Actor.extract_requests_from_response(finished_request.result()) for finished_request in remote_url_requests)))
+
+    @staticmethod
+    def extract_requests_from_response(response: HttpResponse) -> list[Request]:
+        matches = list(re.finditer(URL_NO_COMMAS_REGEX, response.read().decode('utf-8')))
+        return [Request.from_url(match.group(0)) for match in matches]
 
 Actor = cast(_ActorType, Proxy(_ActorType))
 """The entry point of the SDK, through which all the Actor operations should be done."""
diff --git a/tests/unit/actor/test_actor_create_proxy_configuration.py b/tests/unit/actor/test_actor_create_proxy_configuration.py
index b5dd293e..93440996 100644
--- a/tests/unit/actor/test_actor_create_proxy_configuration.py
+++ b/tests/unit/actor/test_actor_create_proxy_configuration.py
@@ -2,6 +2,7 @@
 
 import typing
 from typing import TYPE_CHECKING
+from unittest import mock
 
 import httpx
 import pytest
@@ -10,6 +11,7 @@
 from apify_shared.consts import ApifyEnvVars
 from crawlee._request import UserData
 from crawlee._types import HttpHeaders, HttpMethod
+from crawlee.http_clients import HttpxHttpClient, HttpResponse
 
 from apify import Actor
 
@@ -165,7 +167,7 @@ async def test_actor_create_request_list_request_types(
         request_dict_input,
     ]
 
-    generated_request_list = Actor.create_request_list(actor_start_urls_input=example_start_urls_input)
+    generated_request_list =await Actor.create_request_list(actor_start_urls_input=example_start_urls_input)
 
     assert not await generated_request_list.is_empty()
     generated_request = await generated_request_list.fetch_next_request()
@@ -181,3 +183,44 @@ async def test_actor_create_request_list_request_types(
     assert generated_request.user_data == expected_user_data
     expected_headers = HttpHeaders(root=optional_input.get('headers', {}))
     assert generated_request.headers == expected_headers
+
+
+async def test_actor_create_request_list_from_url():
+    expected_urls = {"http://www.something.com", "https://www.something_else.com", "http://www.bla.net"}
+    response_body = "blablabla{} more blablabla{} ,\n even more blablbablba.{}".format(*expected_urls)
+    mocked_http_client = HttpxHttpClient()
+    class DummyResponse(HttpResponse):
+        @property
+        def http_version(self) -> str:
+            """The HTTP version used in the response."""
+            return ""
+
+        @property
+        def status_code(self) -> int:
+            """The HTTP status code received from the server."""
+            return 200
+
+        @property
+        def headers(self) -> HttpHeaders:
+            """The HTTP headers received in the response."""
+            return HttpHeaders()
+
+        def read(self) -> bytes:
+            return response_body.encode('utf-8')
+
+
+    async def mocked_send_request(*args, **kwargs):
+        return DummyResponse()
+    with mock.patch.object(mocked_http_client, "send_request", mocked_send_request) as mocked_send_request2:
+
+        example_start_urls_input = [
+            {"requestsFromUrl": "https://crawlee.dev/file.txt", 'method': "GET"}
+            ]
+
+
+        generated_request_list =await Actor.create_request_list(actor_start_urls_input=example_start_urls_input, http_client=mocked_http_client)
+        generated_requests = []
+        while request:= await generated_request_list.fetch_next_request():
+            generated_requests.append(request)
+
+        assert set(generated_request.url for generated_request in generated_requests) == expected_urls

From 0a465be93c4ca5d020593529c7e2c3ea1531d5ff Mon Sep 17 00:00:00 2001
From: Josef Prochazka <josef.prochazka@apify.com>
Date: Fri, 15 Nov 2024 14:19:59 +0100
Subject: [PATCH 04/20] WIP

Finalize tests.
Split to its own file.
---
 src/apify/_actor.py                           | 58 ++++--------
 src/apify/_actor_inputs.py                    | 73 +++++++++++++++
 .../test_actor_create_proxy_configuration.py  | 90 ++++++++++++++-----
 3 files changed, 158 insertions(+), 63 deletions(-)
 create mode 100644 src/apify/_actor_inputs.py

diff --git a/src/apify/_actor.py b/src/apify/_actor.py
index 06023400..a30e189e 100644
--- a/src/apify/_actor.py
+++ b/src/apify/_actor.py
@@ -8,7 +8,6 @@
 from itertools import chain
 from typing import TYPE_CHECKING, Any, Callable, TypeVar, cast
 
-from crawlee.http_clients import HttpxHttpClient, HttpResponse, BaseHttpClient
 from lazy_object_proxy import Proxy
 from pydantic import AliasChoices
 from typing_extensions import Self
@@ -18,8 +17,10 @@
 from apify_shared.utils import ignore_docs, maybe_extract_enum_member_value
 from crawlee import Request, service_container
 from crawlee.events._types import Event, EventPersistStateData
+from crawlee.http_clients import BaseHttpClient, HttpResponse, HttpxHttpClient
 from crawlee.storages import RequestList
 
+from apify._actor_inputs import _create_request_list
 from apify._configuration import Configuration
 from apify._consts import EVENT_LISTENERS_TIMEOUT
 from apify._crypto import decrypt_input_secrets, load_private_key
@@ -42,9 +43,6 @@
 
 MainReturnType = TypeVar('MainReturnType')
 
-URL_NO_COMMAS_REGEX = re.compile(r"https?:\/\/(www\.)?([a-zA-Z0-9]|[a-zA-Z0-9][-a-zA-Z0-9@:%._+~#=]{0,254}[a-zA-Z0-9])\.[a-z]{2,63}(:\d{1,5})?(\/[-a-zA-Z0-9@:%_+.~#?&/=()]*)?")
-# JS version. TODO rewrite to Python regexp
-# /https?:\/\/(www\.)?([\p{L}0-9]|[\p{L}0-9][-\p{L}0-9@:%._+~#=]{0,254}[\p{L}0-9])\.[a-z]{2,63}(:\d{1,5})?(\/[-\p{L}0-9@:%_+.~#?&/=()]*)?/giu;
 
 
 class _ActorType:
@@ -983,44 +981,20 @@ async def create_proxy_configuration(
         return proxy_configuration
 
     @staticmethod
-    async def create_request_list(*, actor_start_urls_input: dict, http_client: BaseHttpClient = HttpxHttpClient()) -> RequestList:
-        simple_url_requests_inputs = [request_input for request_input in actor_start_urls_input if "url" in request_input]
-        remote_url_requests_inputs = [request_input for request_input in actor_start_urls_input if "requestsFromUrl" in request_input]
-
-        simple_url_requests = Actor._create_requests_from_input(simple_url_requests_inputs)
-        remote_url_requests = await Actor._create_requests_from_url(remote_url_requests_inputs, http_client=http_client)
-
-        return RequestList(requests=simple_url_requests + remote_url_requests)
-
-    @staticmethod
-    def _create_requests_from_input(simple_url_requests_inputs: list[dict[str,str]]) -> list[Request]:
-        return [
-                Request.from_url(
-                    method=request_input.get('method'),
-                    url=request_input.get('url'),
-                    payload=request_input.get('payload', '').encode('utf-8'),
-                    headers=request_input.get('headers', {}),
-                    user_data=request_input.get('userData', {}),
-                )
-                for request_input in simple_url_requests_inputs]
-
-    @staticmethod
-    async def _create_requests_from_url(remote_url_requests_inputs: list[dict[str,str]], http_client: BaseHttpClient ) -> list[Request]:
-        remote_url_requests = []
-        for input in remote_url_requests_inputs:
-            remote_url_requests.append(asyncio.create_task(http_client.send_request(
-                url=input["requestsFromUrl"],
-                headers=input.get("headers", {}),
-                payload=input.get("payload", "").encode('utf-8'),
-            )))
-        await asyncio.gather(*remote_url_requests)
-        # TODO as callbacks
-        return list(chain.from_iterable((Actor.extract_requests_from_response(finished_request.result()) for finished_request in remote_url_requests)))
-
-    @staticmethod
-    def extract_requests_from_response(response: HttpResponse) -> list[Request]:
-        matches = list(re.finditer(URL_NO_COMMAS_REGEX, response.read().decode('utf-8')))
-        return [Request.from_url(match.group(0)) for match in matches]
+    async def create_request_list(
+        *, actor_start_urls_input: list[dict[str,str]], http_client: BaseHttpClient | None= None
+    ) -> RequestList:
+        """Creates request list from Actor input requestListSources. This accepts list of urls and requestsFromUrl.
+
+        Example:
+            actor_start_urls_input = [
+                # Gather urls from response body.
+                {'requestsFromUrl': 'https://crawlee.dev/file.txt', 'method': 'GET'},
+                # Directly include this url.
+                {'url': 'https://crawlee.dev', 'method': 'GET'}
+            ]
+            """
+        return await _create_request_list(actor_start_urls_input=actor_start_urls_input, http_client=http_client)
 
 Actor = cast(_ActorType, Proxy(_ActorType))
 """The entry point of the SDK, through which all the Actor operations should be done."""
diff --git a/src/apify/_actor_inputs.py b/src/apify/_actor_inputs.py
new file mode 100644
index 00000000..bf7d6fca
--- /dev/null
+++ b/src/apify/_actor_inputs.py
@@ -0,0 +1,73 @@
+import asyncio
+from itertools import chain
+import re
+
+from crawlee import Request
+from crawlee.http_clients import BaseHttpClient, HttpxHttpClient, HttpResponse
+from crawlee.storages import RequestList
+
+URL_NO_COMMAS_REGEX = re.compile(
+    r'https?:\/\/(www\.)?([a-zA-Z0-9]|[a-zA-Z0-9][-a-zA-Z0-9@:%._+~#=]{0,254}[a-zA-Z0-9])\.[a-z]{2,63}(:\d{1,5})?(\/[-a-zA-Z0-9@:%_+.~#?&/=()]*)?'
+)
+
+@staticmethod
+async def _create_request_list(
+    *, actor_start_urls_input: dict, http_client: BaseHttpClient | None = None
+) -> RequestList:
+    if not http_client:
+        http_client = HttpxHttpClient()
+    simple_url_requests_inputs = [
+        request_input for request_input in actor_start_urls_input if 'url' in request_input
+    ]
+    remote_url_requests_inputs = [
+        request_input for request_input in actor_start_urls_input if 'requestsFromUrl' in request_input
+    ]
+
+    simple_url_requests = _create_requests_from_input(simple_url_requests_inputs)
+    remote_url_requests = await _create_requests_from_url(remote_url_requests_inputs, http_client=http_client)
+
+    return RequestList(requests=simple_url_requests + remote_url_requests)
+
+
+@staticmethod
+def _create_requests_from_input(simple_url_requests_inputs: list[dict[str, str]]) -> list[Request]:
+    return [
+        Request.from_url(
+            method=request_input.get('method'),
+            url=request_input.get('url'),
+            payload=request_input.get('payload', '').encode('utf-8'),
+            headers=request_input.get('headers', {}),
+            user_data=request_input.get('userData', {}),
+        )
+        for request_input in simple_url_requests_inputs
+    ]
+
+
+@staticmethod
+async def _create_requests_from_url(
+    remote_url_requests_inputs: list[dict[str, str]], http_client: BaseHttpClient
+) -> list[Request]:
+    remote_url_requests = []
+    for request_input in remote_url_requests_inputs:
+        remote_url_requests.append(
+            asyncio.create_task(
+                http_client.send_request(
+                    method=request_input['method'],
+                    url=request_input['requestsFromUrl'],
+                    headers=request_input.get('headers', {}),
+                    payload=request_input.get('payload', '').encode('utf-8'),
+                )
+            )
+        )
+    await asyncio.gather(*remote_url_requests)
+    # TODO as callbacks
+    a = list(
+        extract_requests_from_response(finished_request.result()) for finished_request in remote_url_requests
+    )
+    return list(chain.from_iterable(a))
+
+
+@staticmethod
+def extract_requests_from_response(response: HttpResponse) -> list[Request]:
+    matches = list(re.finditer(URL_NO_COMMAS_REGEX, response.read().decode('utf-8')))
+    return [Request.from_url(match.group(0)) for match in matches]
diff --git a/tests/unit/actor/test_actor_create_proxy_configuration.py b/tests/unit/actor/test_actor_create_proxy_configuration.py
index 93440996..0ed45f52 100644
--- a/tests/unit/actor/test_actor_create_proxy_configuration.py
+++ b/tests/unit/actor/test_actor_create_proxy_configuration.py
@@ -3,6 +3,7 @@
 import typing
 from typing import TYPE_CHECKING
 from unittest import mock
+from unittest.mock import call
 
 import httpx
 import pytest
@@ -11,7 +12,7 @@
 from apify_shared.consts import ApifyEnvVars
 from crawlee._request import UserData
 from crawlee._types import HttpHeaders, HttpMethod
-from crawlee.http_clients import HttpxHttpClient, HttpResponse
+from crawlee.http_clients import HttpResponse, HttpxHttpClient
 
 from apify import Actor
 
@@ -160,14 +161,14 @@ async def test_proxy_configuration_with_actor_proxy_input(
 async def test_actor_create_request_list_request_types(
     request_method: HttpMethod, optional_input: dict[str, str]
 ) -> None:
-    """Tests proper request list generation from both minimal and full inputs for all method types."""
+    """Test proper request list generation from both minimal and full inputs for all method types for simple input."""
     minimal_request_dict_input = {'url': 'https://www.abc.com', 'method': request_method}
     request_dict_input = {**minimal_request_dict_input, **optional_input}
     example_start_urls_input = [
         request_dict_input,
     ]
 
-    generated_request_list =await Actor.create_request_list(actor_start_urls_input=example_start_urls_input)
+    generated_request_list = await Actor.create_request_list(actor_start_urls_input=example_start_urls_input)
 
     assert not await generated_request_list.is_empty()
     generated_request = await generated_request_list.fetch_next_request()
@@ -185,42 +186,89 @@ async def test_actor_create_request_list_request_types(
     assert generated_request.headers == expected_headers
 
 
-async def test_actor_create_request_list_from_url():
-    expected_urls = {"http://www.something.com", "https://www.something_else.com", "http://www.bla.net"}
-    response_body = "blablabla{} more blablabla{} ,\n even more blablbablba.{}".format(*expected_urls)
-    mocked_http_client = HttpxHttpClient()
+def _create_dummy_response(read_output: typing.Iterable[str]) -> HttpResponse:
+    """Create dummy_response that will iterate through read_output when called like dummy_response.read()"""
+
     class DummyResponse(HttpResponse):
         @property
         def http_version(self) -> str:
-            """The HTTP version used in the response."""
-            return ""
+            return ''
 
         @property
         def status_code(self) -> int:
-            """The HTTP status code received from the server."""
             return 200
 
         @property
         def headers(self) -> HttpHeaders:
-            """The HTTP headers received in the response."""
             return HttpHeaders()
 
         def read(self) -> bytes:
-            return response_body.encode('utf-8')
+            return next(read_output).encode('utf-8')
 
+    return DummyResponse()
 
-    async def mocked_send_request(*args, **kwargs):
-        return DummyResponse()
-    with mock.patch.object(mocked_http_client, "send_request", mocked_send_request) as mocked_send_request2:
 
-        example_start_urls_input = [
-            {"requestsFromUrl": "https://crawlee.dev/file.txt", 'method': "GET"}
-            ]
+async def test_actor_create_request_list_from_url_correctly_send_requests() -> None:
+    """Test that injected HttpClient's method send_request is called with properly passed arguments."""
 
+    example_start_urls_input = [
+        {'requestsFromUrl': 'https://crawlee.dev/file.txt', 'method': 'GET'},
+        {'requestsFromUrl': 'https://www.crawlee.dev/file2', 'method': 'PUT'},
+        {
+            'requestsFromUrl': 'https://www.something.som',
+            'method': 'POST',
+            'headers': {'key': 'value'},
+            'payload': 'some_payload',
+            'userData': 'irrelevant',
+        },
+    ]
+    mocked_read_outputs = ('' for url in example_start_urls_input)
+    http_client = HttpxHttpClient()
+    with mock.patch.object(
+        http_client, 'send_request', return_value=_create_dummy_response(mocked_read_outputs)
+    ) as mocked_send_request:
+        await Actor.create_request_list(actor_start_urls_input=example_start_urls_input, http_client=http_client)
+
+    expected_calls = [
+        call(
+            method=example_input['method'],
+            url=example_input['requestsFromUrl'],
+            headers=example_input.get('headers', {}),
+            payload=example_input.get('payload', '').encode('utf-8'),
+        )
+        for example_input in example_start_urls_input
+    ]
+    mocked_send_request.assert_has_calls(expected_calls)
+
+
+async def test_actor_create_request_list_from_url() -> None:
+    """Test that create_request_list is correctly reading urls from remote url sources and also from simple input."""
+    expected_simple_url = 'https://www.someurl.com'
+    expected_remote_urls_1 = {'http://www.something.com', 'https://www.somethingelse.com', 'http://www.bla.net'}
+    expected_remote_urls_2 = {'http://www.ok.com', 'https://www.true-positive.com'}
+    expected_urls = expected_remote_urls_1 | expected_remote_urls_2 | {expected_simple_url}
+    response_bodies = iter(
+        (
+            'blablabla{} more blablabla{} , even more blablabla. {} '.format(*expected_remote_urls_1),
+            'some stuff{} more stuff{} www.falsepositive www.false_positive.com'.format(*expected_remote_urls_2),
+        )
+    )
 
-        generated_request_list =await Actor.create_request_list(actor_start_urls_input=example_start_urls_input, http_client=mocked_http_client)
+    example_start_urls_input = [
+        {'requestsFromUrl': 'https://crawlee.dev/file.txt', 'method': 'GET'},
+        {'url': expected_simple_url, 'method': 'GET'},
+        {'requestsFromUrl': 'https://www.crawlee.dev/file2', 'method': 'GET'},
+    ]
+
+    http_client = HttpxHttpClient()
+    with mock.patch.object(http_client, 'send_request', return_value=_create_dummy_response(response_bodies)):
+        generated_request_list = await Actor.create_request_list(
+            actor_start_urls_input=example_start_urls_input, http_client=http_client
+        )
         generated_requests = []
-        while request:= await generated_request_list.fetch_next_request():
+        while request := await generated_request_list.fetch_next_request():
+            print(request)
             generated_requests.append(request)
 
-        assert set(generated_request.url for generated_request in generated_requests) == expected_urls
+    # Check correctly created requests' urls in request list
+    assert {generated_request.url for generated_request in generated_requests} == expected_urls

From 4b167374bd4c5bc6f8bf71a8c563d9511f835a8f Mon Sep 17 00:00:00 2001
From: Josef Prochazka <josef.prochazka@apify.com>
Date: Fri, 15 Nov 2024 15:43:08 +0100
Subject: [PATCH 05/20] Use Pydantic to handle raw inputs

Fix typing issues

WIP
---
 pyproject.toml                                |   3 +
 src/apify/_actor.py                           |  18 +--
 src/apify/_actor_inputs.py                    |  83 ++++++-----
 src/apify/_platform_event_manager.py          |   2 +-
 .../test_actor_create_proxy_configuration.py  | 131 -----------------
 .../actor/test_actor_create_request_list.py   | 139 ++++++++++++++++++
 6 files changed, 201 insertions(+), 175 deletions(-)
 create mode 100644 tests/unit/actor/test_actor_create_request_list.py

diff --git a/pyproject.toml b/pyproject.toml
index 0d41756e..c3a01c41 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -141,6 +141,9 @@ indent-style = "space"
 docstring-quotes = "double"
 inline-quotes = "single"
 
+[tool.ruff.lint.flake8-type-checking]
+runtime-evaluated-base-classes = ["pydantic.BaseModel"]
+
 [tool.ruff.lint.flake8-builtins]
 builtins-ignorelist = ["id"]
 
diff --git a/src/apify/_actor.py b/src/apify/_actor.py
index a30e189e..872416a0 100644
--- a/src/apify/_actor.py
+++ b/src/apify/_actor.py
@@ -2,10 +2,8 @@
 
 import asyncio
 import os
-import re
 import sys
 from datetime import timedelta
-from itertools import chain
 from typing import TYPE_CHECKING, Any, Callable, TypeVar, cast
 
 from lazy_object_proxy import Proxy
@@ -15,10 +13,8 @@
 from apify_client import ApifyClientAsync
 from apify_shared.consts import ActorEnvVars, ActorExitCodes, ApifyEnvVars
 from apify_shared.utils import ignore_docs, maybe_extract_enum_member_value
-from crawlee import Request, service_container
+from crawlee import service_container
 from crawlee.events._types import Event, EventPersistStateData
-from crawlee.http_clients import BaseHttpClient, HttpResponse, HttpxHttpClient
-from crawlee.storages import RequestList
 
 from apify._actor_inputs import _create_request_list
 from apify._configuration import Configuration
@@ -36,7 +32,9 @@
     import logging
     from types import TracebackType
 
+    from crawlee.http_clients import BaseHttpClient
     from crawlee.proxy_configuration import _NewUrlFunction
+    from crawlee.storages import RequestList
 
     from apify._models import Webhook
 
@@ -44,7 +42,6 @@
 MainReturnType = TypeVar('MainReturnType')
 
 
-
 class _ActorType:
     """The class of `Actor`. Only make a new instance if you're absolutely sure you need to."""
 
@@ -982,19 +979,20 @@ async def create_proxy_configuration(
 
     @staticmethod
     async def create_request_list(
-        *, actor_start_urls_input: list[dict[str,str]], http_client: BaseHttpClient | None= None
+        *, actor_start_urls_input: list[dict[str, Any]], http_client: BaseHttpClient | None = None
     ) -> RequestList:
-        """Creates request list from Actor input requestListSources. This accepts list of urls and requestsFromUrl.
+        """Creates request list from Actor input requestListSources. This accepts list of urls and requests_from_url.
 
         Example:
             actor_start_urls_input = [
                 # Gather urls from response body.
-                {'requestsFromUrl': 'https://crawlee.dev/file.txt', 'method': 'GET'},
+                {'requests_from_url': 'https://crawlee.dev/file.txt', 'method': 'GET'},
                 # Directly include this url.
                 {'url': 'https://crawlee.dev', 'method': 'GET'}
             ]
-            """
+        """
         return await _create_request_list(actor_start_urls_input=actor_start_urls_input, http_client=http_client)
 
+
 Actor = cast(_ActorType, Proxy(_ActorType))
 """The entry point of the SDK, through which all the Actor operations should be done."""
diff --git a/src/apify/_actor_inputs.py b/src/apify/_actor_inputs.py
index bf7d6fca..921064aa 100644
--- a/src/apify/_actor_inputs.py
+++ b/src/apify/_actor_inputs.py
@@ -1,26 +1,46 @@
+from __future__ import annotations
+
 import asyncio
-from itertools import chain
 import re
+from asyncio import Task
+from typing import Any
+
+from pydantic import BaseModel, Field
 
 from crawlee import Request
-from crawlee.http_clients import BaseHttpClient, HttpxHttpClient, HttpResponse
+from crawlee._types import HttpMethod  #  TODO: Make public in Crawlee?
+from crawlee.http_clients import BaseHttpClient, HttpxHttpClient
 from crawlee.storages import RequestList
 
 URL_NO_COMMAS_REGEX = re.compile(
     r'https?:\/\/(www\.)?([a-zA-Z0-9]|[a-zA-Z0-9][-a-zA-Z0-9@:%._+~#=]{0,254}[a-zA-Z0-9])\.[a-z]{2,63}(:\d{1,5})?(\/[-a-zA-Z0-9@:%_+.~#?&/=()]*)?'
 )
 
+class _RequestDetails(BaseModel):
+    method: HttpMethod
+    payload: str = ''
+    headers: dict[str, str] = Field(default_factory=dict)
+    user_data: dict[str, str]= Field(default_factory=dict, alias='user_data')
+
+class _RequestsFromUrlInput(_RequestDetails):
+    requests_from_url: str = Field(alias='requests_from_url')
+
+class _SimpleUrlInput(_RequestDetails):
+    url: str
+
+
 @staticmethod
 async def _create_request_list(
-    *, actor_start_urls_input: dict, http_client: BaseHttpClient | None = None
+    *, actor_start_urls_input: list[dict[str, Any]], http_client: BaseHttpClient | None = None
 ) -> RequestList:
     if not http_client:
         http_client = HttpxHttpClient()
     simple_url_requests_inputs = [
-        request_input for request_input in actor_start_urls_input if 'url' in request_input
-    ]
+        _SimpleUrlInput(**request_input) for request_input in actor_start_urls_input
+        if 'url' in request_input]
     remote_url_requests_inputs = [
-        request_input for request_input in actor_start_urls_input if 'requestsFromUrl' in request_input
+        _RequestsFromUrlInput(**request_input) for request_input in actor_start_urls_input
+        if 'requests_from_url' in request_input
     ]
 
     simple_url_requests = _create_requests_from_input(simple_url_requests_inputs)
@@ -30,44 +50,41 @@ async def _create_request_list(
 
 
 @staticmethod
-def _create_requests_from_input(simple_url_requests_inputs: list[dict[str, str]]) -> list[Request]:
+def _create_requests_from_input(simple_url_inputs: list[_SimpleUrlInput]) -> list[Request]:
     return [
         Request.from_url(
-            method=request_input.get('method'),
-            url=request_input.get('url'),
-            payload=request_input.get('payload', '').encode('utf-8'),
-            headers=request_input.get('headers', {}),
-            user_data=request_input.get('userData', {}),
+            method=request_input.method,
+            url=request_input.url,
+            payload=request_input.payload.encode('utf-8'),
+            headers=request_input.headers,
+            user_data=request_input.user_data,
         )
-        for request_input in simple_url_requests_inputs
+        for request_input in simple_url_inputs
     ]
 
 
 @staticmethod
 async def _create_requests_from_url(
-    remote_url_requests_inputs: list[dict[str, str]], http_client: BaseHttpClient
+    remote_url_requests_inputs: list[_RequestsFromUrlInput], http_client: BaseHttpClient
 ) -> list[Request]:
+    created_requests: list[Request] = []
+
+    def extract_requests_from_response(task: Task) -> list[Request]:
+        matches = re.finditer(URL_NO_COMMAS_REGEX, task.result().read().decode('utf-8'))
+        created_requests.extend([Request.from_url(match.group(0)) for match in matches])
+
     remote_url_requests = []
     for request_input in remote_url_requests_inputs:
-        remote_url_requests.append(
-            asyncio.create_task(
-                http_client.send_request(
-                    method=request_input['method'],
-                    url=request_input['requestsFromUrl'],
-                    headers=request_input.get('headers', {}),
-                    payload=request_input.get('payload', '').encode('utf-8'),
-                )
+        task = asyncio.create_task(
+            http_client.send_request(
+                method=request_input.method,
+                url=request_input.requests_from_url,
+                headers=request_input.headers,
+                payload=request_input.payload.encode('utf-8'),
             )
         )
-    await asyncio.gather(*remote_url_requests)
-    # TODO as callbacks
-    a = list(
-        extract_requests_from_response(finished_request.result()) for finished_request in remote_url_requests
-    )
-    return list(chain.from_iterable(a))
+        task.add_done_callback(extract_requests_from_response)
+        remote_url_requests.append(task)
 
-
-@staticmethod
-def extract_requests_from_response(response: HttpResponse) -> list[Request]:
-    matches = list(re.finditer(URL_NO_COMMAS_REGEX, response.read().decode('utf-8')))
-    return [Request.from_url(match.group(0)) for match in matches]
+    await asyncio.gather(*remote_url_requests)
+    return created_requests
diff --git a/src/apify/_platform_event_manager.py b/src/apify/_platform_event_manager.py
index afbf2c5d..348438bf 100644
--- a/src/apify/_platform_event_manager.py
+++ b/src/apify/_platform_event_manager.py
@@ -1,7 +1,7 @@
 from __future__ import annotations
 
 import asyncio
-from datetime import datetime  # noqa: TCH003
+from datetime import datetime
 from typing import TYPE_CHECKING, Annotated, Any, Literal, Union
 
 import websockets.client
diff --git a/tests/unit/actor/test_actor_create_proxy_configuration.py b/tests/unit/actor/test_actor_create_proxy_configuration.py
index 0ed45f52..e0c7cd57 100644
--- a/tests/unit/actor/test_actor_create_proxy_configuration.py
+++ b/tests/unit/actor/test_actor_create_proxy_configuration.py
@@ -1,18 +1,12 @@
 from __future__ import annotations
 
-import typing
 from typing import TYPE_CHECKING
-from unittest import mock
-from unittest.mock import call
 
 import httpx
 import pytest
 
 from apify_client import ApifyClientAsync
 from apify_shared.consts import ApifyEnvVars
-from crawlee._request import UserData
-from crawlee._types import HttpHeaders, HttpMethod
-from crawlee.http_clients import HttpResponse, HttpxHttpClient
 
 from apify import Actor
 
@@ -147,128 +141,3 @@ async def test_proxy_configuration_with_actor_proxy_input(
     assert len(route.calls) == 2
 
     await Actor.exit()
-
-
-@pytest.mark.parametrize('request_method', typing.get_args(HttpMethod))
-@pytest.mark.parametrize(
-    'optional_input',
-    [
-        {},
-        {'payload': 'some payload', 'userData': {'some key': 'some value'}, 'headers': {'h1': 'v1', 'h2': 'v2'}},
-    ],
-    ids=['minimal', 'all_options'],
-)
-async def test_actor_create_request_list_request_types(
-    request_method: HttpMethod, optional_input: dict[str, str]
-) -> None:
-    """Test proper request list generation from both minimal and full inputs for all method types for simple input."""
-    minimal_request_dict_input = {'url': 'https://www.abc.com', 'method': request_method}
-    request_dict_input = {**minimal_request_dict_input, **optional_input}
-    example_start_urls_input = [
-        request_dict_input,
-    ]
-
-    generated_request_list = await Actor.create_request_list(actor_start_urls_input=example_start_urls_input)
-
-    assert not await generated_request_list.is_empty()
-    generated_request = await generated_request_list.fetch_next_request()
-    assert await generated_request_list.is_empty()
-
-    assert generated_request.method == request_dict_input['method']
-    assert generated_request.url == request_dict_input['url']
-    assert generated_request.payload == request_dict_input.get('payload', '').encode('utf-8')
-    expected_user_data = UserData()
-    if 'userData' in optional_input:
-        for key, value in optional_input['userData'].items():
-            expected_user_data[key] = value
-    assert generated_request.user_data == expected_user_data
-    expected_headers = HttpHeaders(root=optional_input.get('headers', {}))
-    assert generated_request.headers == expected_headers
-
-
-def _create_dummy_response(read_output: typing.Iterable[str]) -> HttpResponse:
-    """Create dummy_response that will iterate through read_output when called like dummy_response.read()"""
-
-    class DummyResponse(HttpResponse):
-        @property
-        def http_version(self) -> str:
-            return ''
-
-        @property
-        def status_code(self) -> int:
-            return 200
-
-        @property
-        def headers(self) -> HttpHeaders:
-            return HttpHeaders()
-
-        def read(self) -> bytes:
-            return next(read_output).encode('utf-8')
-
-    return DummyResponse()
-
-
-async def test_actor_create_request_list_from_url_correctly_send_requests() -> None:
-    """Test that injected HttpClient's method send_request is called with properly passed arguments."""
-
-    example_start_urls_input = [
-        {'requestsFromUrl': 'https://crawlee.dev/file.txt', 'method': 'GET'},
-        {'requestsFromUrl': 'https://www.crawlee.dev/file2', 'method': 'PUT'},
-        {
-            'requestsFromUrl': 'https://www.something.som',
-            'method': 'POST',
-            'headers': {'key': 'value'},
-            'payload': 'some_payload',
-            'userData': 'irrelevant',
-        },
-    ]
-    mocked_read_outputs = ('' for url in example_start_urls_input)
-    http_client = HttpxHttpClient()
-    with mock.patch.object(
-        http_client, 'send_request', return_value=_create_dummy_response(mocked_read_outputs)
-    ) as mocked_send_request:
-        await Actor.create_request_list(actor_start_urls_input=example_start_urls_input, http_client=http_client)
-
-    expected_calls = [
-        call(
-            method=example_input['method'],
-            url=example_input['requestsFromUrl'],
-            headers=example_input.get('headers', {}),
-            payload=example_input.get('payload', '').encode('utf-8'),
-        )
-        for example_input in example_start_urls_input
-    ]
-    mocked_send_request.assert_has_calls(expected_calls)
-
-
-async def test_actor_create_request_list_from_url() -> None:
-    """Test that create_request_list is correctly reading urls from remote url sources and also from simple input."""
-    expected_simple_url = 'https://www.someurl.com'
-    expected_remote_urls_1 = {'http://www.something.com', 'https://www.somethingelse.com', 'http://www.bla.net'}
-    expected_remote_urls_2 = {'http://www.ok.com', 'https://www.true-positive.com'}
-    expected_urls = expected_remote_urls_1 | expected_remote_urls_2 | {expected_simple_url}
-    response_bodies = iter(
-        (
-            'blablabla{} more blablabla{} , even more blablabla. {} '.format(*expected_remote_urls_1),
-            'some stuff{} more stuff{} www.falsepositive www.false_positive.com'.format(*expected_remote_urls_2),
-        )
-    )
-
-    example_start_urls_input = [
-        {'requestsFromUrl': 'https://crawlee.dev/file.txt', 'method': 'GET'},
-        {'url': expected_simple_url, 'method': 'GET'},
-        {'requestsFromUrl': 'https://www.crawlee.dev/file2', 'method': 'GET'},
-    ]
-
-    http_client = HttpxHttpClient()
-    with mock.patch.object(http_client, 'send_request', return_value=_create_dummy_response(response_bodies)):
-        generated_request_list = await Actor.create_request_list(
-            actor_start_urls_input=example_start_urls_input, http_client=http_client
-        )
-        generated_requests = []
-        while request := await generated_request_list.fetch_next_request():
-            print(request)
-            generated_requests.append(request)
-
-    # Check correctly created requests' urls in request list
-    assert {generated_request.url for generated_request in generated_requests} == expected_urls
diff --git a/tests/unit/actor/test_actor_create_request_list.py b/tests/unit/actor/test_actor_create_request_list.py
new file mode 100644
index 00000000..c4528f3d
--- /dev/null
+++ b/tests/unit/actor/test_actor_create_request_list.py
@@ -0,0 +1,139 @@
+from __future__ import annotations
+
+import typing
+from unittest import mock
+from unittest.mock import call
+
+import pytest
+
+from crawlee._request import UserData  #  TODO: Make public in Crawlee?
+from crawlee._types import HttpHeaders, HttpMethod  #  TODO: Make public in Crawlee?
+from crawlee.http_clients import HttpResponse, HttpxHttpClient
+
+from apify import Actor
+
+
+@pytest.mark.parametrize('request_method', typing.get_args(HttpMethod))
+@pytest.mark.parametrize(
+    'optional_input',
+    [
+        {},
+        {'payload': 'some payload', 'user_data': {'some key': 'some value'}, 'headers': {'h1': 'v1', 'h2': 'v2'}},
+    ],
+    ids=['minimal', 'all_options'],
+)
+async def test_actor_create_request_list_request_types(
+    request_method: HttpMethod, optional_input: dict[str, typing.Any]
+) -> None:
+    """Test proper request list generation from both minimal and full inputs for all method types for simple input."""
+    minimal_request_dict_input = {'url': 'https://www.abc.com', 'method': request_method}
+    request_dict_input = {**minimal_request_dict_input, **optional_input}
+    example_start_urls_input = [
+        request_dict_input,
+    ]
+
+    generated_request_list = await Actor.create_request_list(actor_start_urls_input=example_start_urls_input)
+
+    assert not await generated_request_list.is_empty()
+    generated_request = await generated_request_list.fetch_next_request()
+    assert generated_request is not None
+    assert await generated_request_list.is_empty()
+
+    assert generated_request.method == request_dict_input['method']
+    assert generated_request.url == request_dict_input['url']
+    assert generated_request.payload == request_dict_input.get('payload', '').encode('utf-8')
+    expected_user_data = UserData()
+    if 'user_data' in optional_input:
+        for key, value in optional_input['user_data'].items():
+            expected_user_data[key] = value
+    assert generated_request.user_data == expected_user_data
+    expected_headers = HttpHeaders(root=optional_input.get('headers', {}))
+    assert generated_request.headers == expected_headers
+
+
+def _create_dummy_response(read_output: typing.Iterator[str]) -> HttpResponse:
+    """Create dummy_response that will iterate through read_output when called like dummy_response.read()"""
+
+    class DummyResponse(HttpResponse):
+        @property
+        def http_version(self) -> str:
+            return ''
+
+        @property
+        def status_code(self) -> int:
+            return 200
+
+        @property
+        def headers(self) -> HttpHeaders:
+            return HttpHeaders()
+
+        def read(self) -> bytes:
+            return next(read_output).encode('utf-8')
+
+    return DummyResponse()
+
+
+async def test_actor_create_request_list_from_url_correctly_send_requests() -> None:
+    """Test that injected HttpClient's method send_request is called with properly passed arguments."""
+
+    example_start_urls_input: list[dict[str, typing.Any]] = [
+        {'requests_from_url': 'https://crawlee.dev/file.txt', 'method': 'GET'},
+        {'requests_from_url': 'https://www.crawlee.dev/file2', 'method': 'PUT'},
+        {
+            'requests_from_url': 'https://www.something.som',
+            'method': 'POST',
+            'headers': {'key': 'value'},
+            'payload': 'some_payload',
+            'user_data': {'another_key': 'another_value'},
+        },
+    ]
+    mocked_read_outputs = ('' for url in example_start_urls_input)
+    http_client = HttpxHttpClient()
+    with mock.patch.object(
+        http_client, 'send_request', return_value=_create_dummy_response(mocked_read_outputs)
+    ) as mocked_send_request:
+        await Actor.create_request_list(actor_start_urls_input=example_start_urls_input, http_client=http_client)
+
+    expected_calls = [
+        call(
+            method=example_input['method'],
+            url=example_input['requests_from_url'],
+            headers=example_input.get('headers', {}),
+            payload=example_input.get('payload', '').encode('utf-8'),
+        )
+        for example_input in example_start_urls_input
+    ]
+    mocked_send_request.assert_has_calls(expected_calls)
+
+
+async def test_actor_create_request_list_from_url() -> None:
+    """Test that create_request_list is correctly reading urls from remote url sources and also from simple input."""
+    expected_simple_url = 'https://www.someurl.com'
+    expected_remote_urls_1 = {'http://www.something.com', 'https://www.somethingelse.com', 'http://www.bla.net'}
+    expected_remote_urls_2 = {'http://www.ok.com', 'https://www.true-positive.com'}
+    expected_urls = expected_remote_urls_1 | expected_remote_urls_2 | {expected_simple_url}
+    response_bodies = iter(
+        (
+            'blablabla{} more blablabla{} , even more blablabla. {} '.format(*expected_remote_urls_1),
+            'some stuff{} more stuff{} www.falsepositive www.false_positive.com'.format(*expected_remote_urls_2),
+        )
+    )
+
+    example_start_urls_input = [
+        {'requests_from_url': 'https://crawlee.dev/file.txt', 'method': 'GET'},
+        {'url': expected_simple_url, 'method': 'GET'},
+        {'requests_from_url': 'https://www.crawlee.dev/file2', 'method': 'GET'},
+    ]
+
+    http_client = HttpxHttpClient()
+    with mock.patch.object(http_client, 'send_request', return_value=_create_dummy_response(response_bodies)):
+        generated_request_list = await Actor.create_request_list(
+            actor_start_urls_input=example_start_urls_input, http_client=http_client
+        )
+        generated_requests = []
+        while request := await generated_request_list.fetch_next_request():
+            print(request)
+            generated_requests.append(request)
+
+    # Check correctly created requests' urls in request list
+    assert {generated_request.url for generated_request in generated_requests} == expected_urls

From f2c24404e62d4d1668767ebfe68d4fe973257220 Mon Sep 17 00:00:00 2001
From: Josef Prochazka <josef.prochazka@apify.com>
Date: Fri, 15 Nov 2024 16:15:26 +0100
Subject: [PATCH 06/20] Properly pass request creation settings.

TODO: Finish test for it.
WIP
---
 src/apify/_actor_inputs.py                    | 28 +++++++++++++------
 .../actor/test_actor_create_request_list.py   |  8 ++++--
 2 files changed, 25 insertions(+), 11 deletions(-)

diff --git a/src/apify/_actor_inputs.py b/src/apify/_actor_inputs.py
index 921064aa..088b9aa5 100644
--- a/src/apify/_actor_inputs.py
+++ b/src/apify/_actor_inputs.py
@@ -5,6 +5,7 @@
 from asyncio import Task
 from typing import Any
 
+from functools import partial
 from pydantic import BaseModel, Field
 
 from crawlee import Request
@@ -67,23 +68,34 @@ def _create_requests_from_input(simple_url_inputs: list[_SimpleUrlInput]) -> lis
 async def _create_requests_from_url(
     remote_url_requests_inputs: list[_RequestsFromUrlInput], http_client: BaseHttpClient
 ) -> list[Request]:
+    """Crete list of requests from url.
+
+    Send GET requests to urls defined in each requests_from_url of remote_url_requests_inputs. Run extracting
+    callback on each response body and use URL_NO_COMMAS_REGEX regexp to find all links. Create list of Requests from
+    collected links and additional inputs stored in other attributes of each remote_url_requests_inputs.
+    """
     created_requests: list[Request] = []
 
-    def extract_requests_from_response(task: Task) -> list[Request]:
+    def create_requests_from_response(request_input: _SimpleUrlInput, task: Task) -> list[Request]:
+        """Callback to scrape response body with regexp and create Requests from macthes."""
         matches = re.finditer(URL_NO_COMMAS_REGEX, task.result().read().decode('utf-8'))
-        created_requests.extend([Request.from_url(match.group(0)) for match in matches])
+        created_requests.extend([Request.from_url(
+            match.group(0),
+            method=request_input.method,
+            payload=request_input.payload.encode('utf-8'),
+            headers=request_input.headers,
+            user_data=request_input.user_data) for match in matches])
 
     remote_url_requests = []
-    for request_input in remote_url_requests_inputs:
+    for remote_url_requests_input in remote_url_requests_inputs:
         task = asyncio.create_task(
             http_client.send_request(
-                method=request_input.method,
-                url=request_input.requests_from_url,
-                headers=request_input.headers,
-                payload=request_input.payload.encode('utf-8'),
+                method='GET',
+                url=remote_url_requests_input.requests_from_url,
             )
         )
-        task.add_done_callback(extract_requests_from_response)
+
+        task.add_done_callback(partial(create_requests_from_response, remote_url_requests_input))
         remote_url_requests.append(task)
 
     await asyncio.gather(*remote_url_requests)
diff --git a/tests/unit/actor/test_actor_create_request_list.py b/tests/unit/actor/test_actor_create_request_list.py
index c4528f3d..f77812ca 100644
--- a/tests/unit/actor/test_actor_create_request_list.py
+++ b/tests/unit/actor/test_actor_create_request_list.py
@@ -96,10 +96,8 @@ async def test_actor_create_request_list_from_url_correctly_send_requests() -> N
 
     expected_calls = [
         call(
-            method=example_input['method'],
+            method='GET',
             url=example_input['requests_from_url'],
-            headers=example_input.get('headers', {}),
-            payload=example_input.get('payload', '').encode('utf-8'),
         )
         for example_input in example_start_urls_input
     ]
@@ -137,3 +135,7 @@ async def test_actor_create_request_list_from_url() -> None:
 
     # Check correctly created requests' urls in request list
     assert {generated_request.url for generated_request in generated_requests} == expected_urls
+
+async def test_actor_create_request_list_from_url_additional_inputs()  -> None:
+    assert False
+    # TODO test that will check that additional properties, like payload, headers request type are all properly passed.

From 5af94058bb64e9c7430f36a18cedd60e0a9e458c Mon Sep 17 00:00:00 2001
From: Josef Prochazka <josef.prochazka@apify.com>
Date: Sun, 17 Nov 2024 11:21:17 +0100
Subject: [PATCH 07/20] Add tests fro regexp. Add test for checking all
 genrated request properties.

---
 src/apify/_actor_inputs.py                    |  2 +-
 .../actor/test_actor_create_request_list.py   | 73 +++++++++++++++++--
 2 files changed, 69 insertions(+), 6 deletions(-)

diff --git a/src/apify/_actor_inputs.py b/src/apify/_actor_inputs.py
index 088b9aa5..d4048cb0 100644
--- a/src/apify/_actor_inputs.py
+++ b/src/apify/_actor_inputs.py
@@ -3,9 +3,9 @@
 import asyncio
 import re
 from asyncio import Task
+from functools import partial
 from typing import Any
 
-from functools import partial
 from pydantic import BaseModel, Field
 
 from crawlee import Request
diff --git a/tests/unit/actor/test_actor_create_request_list.py b/tests/unit/actor/test_actor_create_request_list.py
index f77812ca..3114f8a2 100644
--- a/tests/unit/actor/test_actor_create_request_list.py
+++ b/tests/unit/actor/test_actor_create_request_list.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import re
 import typing
 from unittest import mock
 from unittest.mock import call
@@ -11,6 +12,7 @@
 from crawlee.http_clients import HttpResponse, HttpxHttpClient
 
 from apify import Actor
+from apify._actor_inputs import URL_NO_COMMAS_REGEX
 
 
 @pytest.mark.parametrize('request_method', typing.get_args(HttpMethod))
@@ -47,8 +49,7 @@ async def test_actor_create_request_list_request_types(
         for key, value in optional_input['user_data'].items():
             expected_user_data[key] = value
     assert generated_request.user_data == expected_user_data
-    expected_headers = HttpHeaders(root=optional_input.get('headers', {}))
-    assert generated_request.headers == expected_headers
+    assert generated_request.headers.root == optional_input.get('headers', {})
 
 
 def _create_dummy_response(read_output: typing.Iterator[str]) -> HttpResponse:
@@ -130,12 +131,74 @@ async def test_actor_create_request_list_from_url() -> None:
         )
         generated_requests = []
         while request := await generated_request_list.fetch_next_request():
-            print(request)
             generated_requests.append(request)
 
     # Check correctly created requests' urls in request list
     assert {generated_request.url for generated_request in generated_requests} == expected_urls
 
 async def test_actor_create_request_list_from_url_additional_inputs()  -> None:
-    assert False
-    # TODO test that will check that additional properties, like payload, headers request type are all properly passed.
+    """Test that all generated request properties are correctly populated from input values."""
+    expected_simple_url = 'https://www.someurl.com'
+    example_start_urls_input = [
+        {'requests_from_url': 'https://crawlee.dev/file.txt',             'method': 'POST',
+            'headers': {'key': 'value'},
+            'payload': 'some_payload',
+            'user_data': {'another_key': 'another_value'}},
+    ]
+    response_bodies = iter((expected_simple_url,))
+    http_client = HttpxHttpClient()
+    with mock.patch.object(http_client, 'send_request', return_value=_create_dummy_response(response_bodies)):
+        generated_request_list = await Actor.create_request_list(
+            actor_start_urls_input=example_start_urls_input, http_client=http_client
+        )
+        request = await generated_request_list.fetch_next_request()
+
+    # Check all properties correctly created for request
+    assert request.url == expected_simple_url
+    assert request.method == example_start_urls_input[0]['method']
+    assert request.headers.root == example_start_urls_input[0]['headers']
+    assert request.payload == example_start_urls_input[0]['payload'].encode('utf-8')
+    expected_user_data = UserData()
+    for key, value in example_start_urls_input[0]['user_data'].items():
+        expected_user_data[key] = value
+    assert request.user_data == expected_user_data
+
+
+@pytest.mark.parametrize('true_positive', [
+    'http://www.something.com',
+    'https://www.something.net',
+    'http://nowww.cz',
+    'https://with-hypen.com',
+    'http://number1.com',
+    'http://www.number.123',
+    'http://many.dots.com',
+    'http://a.com',
+    'http://www.something.com/somethignelse'
+    'http://www.something.com/somethignelse.txt',
+    #  "http://non-english-chars-á.com"  # re module not suitable, regex can do this with \p{L}. Do we want this?
+])
+def test_url_no_commas_regex_true_positives(true_positive: str) -> None:
+    example_string= f'Some text {true_positive} some more text'
+    matches = list(re.finditer(URL_NO_COMMAS_REGEX, example_string))
+    assert len(matches) == 1
+    assert matches[0].group(0) == true_positive
+
+@pytest.mark.parametrize('false_positive',[
+    'http://www.a',
+    'http://a',
+    'http://a.a',
+    'http://123.456',
+    'www.something.com',
+    'http:www.something.com',
+])
+def test_url_no_commas_regex_false_positives(false_positive: str) -> None:
+    example_string= f'Some text {false_positive} some more text'
+    matches = list(re.findall(URL_NO_COMMAS_REGEX, example_string))
+    assert len(matches) == 0
+
+def test_url_no_commas_regex_multi_line() -> None:
+    true_positives = ('http://www.something.com', 'http://www.else.com')
+    example_string= 'Some text {} some more text \n Some new line text {} ...'.format(*true_positives)
+    matches = list(re.finditer(URL_NO_COMMAS_REGEX, example_string))
+    assert len(matches) == 2
+    assert {match.group(0) for match in matches} == set(true_positives)

From cf4534a08b6a7cd6baf463aae7ed92f543ddc293 Mon Sep 17 00:00:00 2001
From: Josef Prochazka <josef.prochazka@apify.com>
Date: Mon, 18 Nov 2024 17:12:19 +0100
Subject: [PATCH 08/20] Use regex instead of re. Add helper class for input
 keys Add top level Input class for handling actor inputs.

---
 poetry.lock                                   | 116 +++++++++++++++-
 pyproject.toml                                |   2 +
 src/apify/_actor.py                           |  19 ---
 src/apify/storages/__init__.py                |   4 +-
 src/apify/{ => storages}/_actor_inputs.py     |  70 +++++++---
 src/apify/storages/_known_actor_input_keys.py |  28 ++++
 ...e_request_list.py => test_actor_inputs.py} | 129 +++++++++---------
 7 files changed, 261 insertions(+), 107 deletions(-)
 rename src/apify/{ => storages}/_actor_inputs.py (56%)
 create mode 100644 src/apify/storages/_known_actor_input_keys.py
 rename tests/unit/actor/{test_actor_create_request_list.py => test_actor_inputs.py} (52%)

diff --git a/poetry.lock b/poetry.lock
index 768801be..f94db0ae 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -2449,6 +2449,109 @@ files = [
     {file = "readchar-4.2.1.tar.gz", hash = "sha256:91ce3faf07688de14d800592951e5575e9c7a3213738ed01d394dcc949b79adb"},
 ]
 
+[[package]]
+name = "regex"
+version = "2024.11.6"
+description = "Alternative regular expression module, to replace re."
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "regex-2024.11.6-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:ff590880083d60acc0433f9c3f713c51f7ac6ebb9adf889c79a261ecf541aa91"},
+    {file = "regex-2024.11.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:658f90550f38270639e83ce492f27d2c8d2cd63805c65a13a14d36ca126753f0"},
+    {file = "regex-2024.11.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:164d8b7b3b4bcb2068b97428060b2a53be050085ef94eca7f240e7947f1b080e"},
+    {file = "regex-2024.11.6-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d3660c82f209655a06b587d55e723f0b813d3a7db2e32e5e7dc64ac2a9e86fde"},
+    {file = "regex-2024.11.6-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d22326fcdef5e08c154280b71163ced384b428343ae16a5ab2b3354aed12436e"},
+    {file = "regex-2024.11.6-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f1ac758ef6aebfc8943560194e9fd0fa18bcb34d89fd8bd2af18183afd8da3a2"},
+    {file = "regex-2024.11.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:997d6a487ff00807ba810e0f8332c18b4eb8d29463cfb7c820dc4b6e7562d0cf"},
+    {file = "regex-2024.11.6-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:02a02d2bb04fec86ad61f3ea7f49c015a0681bf76abb9857f945d26159d2968c"},
+    {file = "regex-2024.11.6-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:f02f93b92358ee3f78660e43b4b0091229260c5d5c408d17d60bf26b6c900e86"},
+    {file = "regex-2024.11.6-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:06eb1be98df10e81ebaded73fcd51989dcf534e3c753466e4b60c4697a003b67"},
+    {file = "regex-2024.11.6-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:040df6fe1a5504eb0f04f048e6d09cd7c7110fef851d7c567a6b6e09942feb7d"},
+    {file = "regex-2024.11.6-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:fdabbfc59f2c6edba2a6622c647b716e34e8e3867e0ab975412c5c2f79b82da2"},
+    {file = "regex-2024.11.6-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:8447d2d39b5abe381419319f942de20b7ecd60ce86f16a23b0698f22e1b70008"},
+    {file = "regex-2024.11.6-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:da8f5fc57d1933de22a9e23eec290a0d8a5927a5370d24bda9a6abe50683fe62"},
+    {file = "regex-2024.11.6-cp310-cp310-win32.whl", hash = "sha256:b489578720afb782f6ccf2840920f3a32e31ba28a4b162e13900c3e6bd3f930e"},
+    {file = "regex-2024.11.6-cp310-cp310-win_amd64.whl", hash = "sha256:5071b2093e793357c9d8b2929dfc13ac5f0a6c650559503bb81189d0a3814519"},
+    {file = "regex-2024.11.6-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:5478c6962ad548b54a591778e93cd7c456a7a29f8eca9c49e4f9a806dcc5d638"},
+    {file = "regex-2024.11.6-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:2c89a8cc122b25ce6945f0423dc1352cb9593c68abd19223eebbd4e56612c5b7"},
+    {file = "regex-2024.11.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:94d87b689cdd831934fa3ce16cc15cd65748e6d689f5d2b8f4f4df2065c9fa20"},
+    {file = "regex-2024.11.6-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1062b39a0a2b75a9c694f7a08e7183a80c63c0d62b301418ffd9c35f55aaa114"},
+    {file = "regex-2024.11.6-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:167ed4852351d8a750da48712c3930b031f6efdaa0f22fa1933716bfcd6bf4a3"},
+    {file = "regex-2024.11.6-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2d548dafee61f06ebdb584080621f3e0c23fff312f0de1afc776e2a2ba99a74f"},
+    {file = "regex-2024.11.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f2a19f302cd1ce5dd01a9099aaa19cae6173306d1302a43b627f62e21cf18ac0"},
+    {file = "regex-2024.11.6-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bec9931dfb61ddd8ef2ebc05646293812cb6b16b60cf7c9511a832b6f1854b55"},
+    {file = "regex-2024.11.6-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:9714398225f299aa85267fd222f7142fcb5c769e73d7733344efc46f2ef5cf89"},
+    {file = "regex-2024.11.6-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:202eb32e89f60fc147a41e55cb086db2a3f8cb82f9a9a88440dcfc5d37faae8d"},
+    {file = "regex-2024.11.6-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:4181b814e56078e9b00427ca358ec44333765f5ca1b45597ec7446d3a1ef6e34"},
+    {file = "regex-2024.11.6-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:068376da5a7e4da51968ce4c122a7cd31afaaec4fccc7856c92f63876e57b51d"},
+    {file = "regex-2024.11.6-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ac10f2c4184420d881a3475fb2c6f4d95d53a8d50209a2500723d831036f7c45"},
+    {file = "regex-2024.11.6-cp311-cp311-win32.whl", hash = "sha256:c36f9b6f5f8649bb251a5f3f66564438977b7ef8386a52460ae77e6070d309d9"},
+    {file = "regex-2024.11.6-cp311-cp311-win_amd64.whl", hash = "sha256:02e28184be537f0e75c1f9b2f8847dc51e08e6e171c6bde130b2687e0c33cf60"},
+    {file = "regex-2024.11.6-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:52fb28f528778f184f870b7cf8f225f5eef0a8f6e3778529bdd40c7b3920796a"},
+    {file = "regex-2024.11.6-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:fdd6028445d2460f33136c55eeb1f601ab06d74cb3347132e1c24250187500d9"},
+    {file = "regex-2024.11.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:805e6b60c54bf766b251e94526ebad60b7de0c70f70a4e6210ee2891acb70bf2"},
+    {file = "regex-2024.11.6-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b85c2530be953a890eaffde05485238f07029600e8f098cdf1848d414a8b45e4"},
+    {file = "regex-2024.11.6-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bb26437975da7dc36b7efad18aa9dd4ea569d2357ae6b783bf1118dabd9ea577"},
+    {file = "regex-2024.11.6-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:abfa5080c374a76a251ba60683242bc17eeb2c9818d0d30117b4486be10c59d3"},
+    {file = "regex-2024.11.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:70b7fa6606c2881c1db9479b0eaa11ed5dfa11c8d60a474ff0e095099f39d98e"},
+    {file = "regex-2024.11.6-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0c32f75920cf99fe6b6c539c399a4a128452eaf1af27f39bce8909c9a3fd8cbe"},
+    {file = "regex-2024.11.6-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:982e6d21414e78e1f51cf595d7f321dcd14de1f2881c5dc6a6e23bbbbd68435e"},
+    {file = "regex-2024.11.6-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:a7c2155f790e2fb448faed6dd241386719802296ec588a8b9051c1f5c481bc29"},
+    {file = "regex-2024.11.6-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:149f5008d286636e48cd0b1dd65018548944e495b0265b45e1bffecce1ef7f39"},
+    {file = "regex-2024.11.6-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:e5364a4502efca094731680e80009632ad6624084aff9a23ce8c8c6820de3e51"},
+    {file = "regex-2024.11.6-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:0a86e7eeca091c09e021db8eb72d54751e527fa47b8d5787caf96d9831bd02ad"},
+    {file = "regex-2024.11.6-cp312-cp312-win32.whl", hash = "sha256:32f9a4c643baad4efa81d549c2aadefaeba12249b2adc5af541759237eee1c54"},
+    {file = "regex-2024.11.6-cp312-cp312-win_amd64.whl", hash = "sha256:a93c194e2df18f7d264092dc8539b8ffb86b45b899ab976aa15d48214138e81b"},
+    {file = "regex-2024.11.6-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:a6ba92c0bcdf96cbf43a12c717eae4bc98325ca3730f6b130ffa2e3c3c723d84"},
+    {file = "regex-2024.11.6-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:525eab0b789891ac3be914d36893bdf972d483fe66551f79d3e27146191a37d4"},
+    {file = "regex-2024.11.6-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:086a27a0b4ca227941700e0b31425e7a28ef1ae8e5e05a33826e17e47fbfdba0"},
+    {file = "regex-2024.11.6-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bde01f35767c4a7899b7eb6e823b125a64de314a8ee9791367c9a34d56af18d0"},
+    {file = "regex-2024.11.6-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b583904576650166b3d920d2bcce13971f6f9e9a396c673187f49811b2769dc7"},
+    {file = "regex-2024.11.6-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1c4de13f06a0d54fa0d5ab1b7138bfa0d883220965a29616e3ea61b35d5f5fc7"},
+    {file = "regex-2024.11.6-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3cde6e9f2580eb1665965ce9bf17ff4952f34f5b126beb509fee8f4e994f143c"},
+    {file = "regex-2024.11.6-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0d7f453dca13f40a02b79636a339c5b62b670141e63efd511d3f8f73fba162b3"},
+    {file = "regex-2024.11.6-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:59dfe1ed21aea057a65c6b586afd2a945de04fc7db3de0a6e3ed5397ad491b07"},
+    {file = "regex-2024.11.6-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:b97c1e0bd37c5cd7902e65f410779d39eeda155800b65fc4d04cc432efa9bc6e"},
+    {file = "regex-2024.11.6-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:f9d1e379028e0fc2ae3654bac3cbbef81bf3fd571272a42d56c24007979bafb6"},
+    {file = "regex-2024.11.6-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:13291b39131e2d002a7940fb176e120bec5145f3aeb7621be6534e46251912c4"},
+    {file = "regex-2024.11.6-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4f51f88c126370dcec4908576c5a627220da6c09d0bff31cfa89f2523843316d"},
+    {file = "regex-2024.11.6-cp313-cp313-win32.whl", hash = "sha256:63b13cfd72e9601125027202cad74995ab26921d8cd935c25f09c630436348ff"},
+    {file = "regex-2024.11.6-cp313-cp313-win_amd64.whl", hash = "sha256:2b3361af3198667e99927da8b84c1b010752fa4b1115ee30beaa332cabc3ef1a"},
+    {file = "regex-2024.11.6-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:3a51ccc315653ba012774efca4f23d1d2a8a8f278a6072e29c7147eee7da446b"},
+    {file = "regex-2024.11.6-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ad182d02e40de7459b73155deb8996bbd8e96852267879396fb274e8700190e3"},
+    {file = "regex-2024.11.6-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:ba9b72e5643641b7d41fa1f6d5abda2c9a263ae835b917348fc3c928182ad467"},
+    {file = "regex-2024.11.6-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:40291b1b89ca6ad8d3f2b82782cc33807f1406cf68c8d440861da6304d8ffbbd"},
+    {file = "regex-2024.11.6-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cdf58d0e516ee426a48f7b2c03a332a4114420716d55769ff7108c37a09951bf"},
+    {file = "regex-2024.11.6-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a36fdf2af13c2b14738f6e973aba563623cb77d753bbbd8d414d18bfaa3105dd"},
+    {file = "regex-2024.11.6-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d1cee317bfc014c2419a76bcc87f071405e3966da434e03e13beb45f8aced1a6"},
+    {file = "regex-2024.11.6-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:50153825ee016b91549962f970d6a4442fa106832e14c918acd1c8e479916c4f"},
+    {file = "regex-2024.11.6-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:ea1bfda2f7162605f6e8178223576856b3d791109f15ea99a9f95c16a7636fb5"},
+    {file = "regex-2024.11.6-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:df951c5f4a1b1910f1a99ff42c473ff60f8225baa1cdd3539fe2819d9543e9df"},
+    {file = "regex-2024.11.6-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:072623554418a9911446278f16ecb398fb3b540147a7828c06e2011fa531e773"},
+    {file = "regex-2024.11.6-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:f654882311409afb1d780b940234208a252322c24a93b442ca714d119e68086c"},
+    {file = "regex-2024.11.6-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:89d75e7293d2b3e674db7d4d9b1bee7f8f3d1609428e293771d1a962617150cc"},
+    {file = "regex-2024.11.6-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:f65557897fc977a44ab205ea871b690adaef6b9da6afda4790a2484b04293a5f"},
+    {file = "regex-2024.11.6-cp38-cp38-win32.whl", hash = "sha256:6f44ec28b1f858c98d3036ad5d7d0bfc568bdd7a74f9c24e25f41ef1ebfd81a4"},
+    {file = "regex-2024.11.6-cp38-cp38-win_amd64.whl", hash = "sha256:bb8f74f2f10dbf13a0be8de623ba4f9491faf58c24064f32b65679b021ed0001"},
+    {file = "regex-2024.11.6-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:5704e174f8ccab2026bd2f1ab6c510345ae8eac818b613d7d73e785f1310f839"},
+    {file = "regex-2024.11.6-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:220902c3c5cc6af55d4fe19ead504de80eb91f786dc102fbd74894b1551f095e"},
+    {file = "regex-2024.11.6-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5e7e351589da0850c125f1600a4c4ba3c722efefe16b297de54300f08d734fbf"},
+    {file = "regex-2024.11.6-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5056b185ca113c88e18223183aa1a50e66507769c9640a6ff75859619d73957b"},
+    {file = "regex-2024.11.6-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2e34b51b650b23ed3354b5a07aab37034d9f923db2a40519139af34f485f77d0"},
+    {file = "regex-2024.11.6-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5670bce7b200273eee1840ef307bfa07cda90b38ae56e9a6ebcc9f50da9c469b"},
+    {file = "regex-2024.11.6-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:08986dce1339bc932923e7d1232ce9881499a0e02925f7402fb7c982515419ef"},
+    {file = "regex-2024.11.6-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:93c0b12d3d3bc25af4ebbf38f9ee780a487e8bf6954c115b9f015822d3bb8e48"},
+    {file = "regex-2024.11.6-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:764e71f22ab3b305e7f4c21f1a97e1526a25ebdd22513e251cf376760213da13"},
+    {file = "regex-2024.11.6-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:f056bf21105c2515c32372bbc057f43eb02aae2fda61052e2f7622c801f0b4e2"},
+    {file = "regex-2024.11.6-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:69ab78f848845569401469da20df3e081e6b5a11cb086de3eed1d48f5ed57c95"},
+    {file = "regex-2024.11.6-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:86fddba590aad9208e2fa8b43b4c098bb0ec74f15718bb6a704e3c63e2cef3e9"},
+    {file = "regex-2024.11.6-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:684d7a212682996d21ca12ef3c17353c021fe9de6049e19ac8481ec35574a70f"},
+    {file = "regex-2024.11.6-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:a03e02f48cd1abbd9f3b7e3586d97c8f7a9721c436f51a5245b3b9483044480b"},
+    {file = "regex-2024.11.6-cp39-cp39-win32.whl", hash = "sha256:41758407fc32d5c3c5de163888068cfee69cb4c2be844e7ac517a52770f9af57"},
+    {file = "regex-2024.11.6-cp39-cp39-win_amd64.whl", hash = "sha256:b2837718570f95dd41675328e111345f9b7095d821bac435aac173ac80b19983"},
+    {file = "regex-2024.11.6.tar.gz", hash = "sha256:7ab159b063c52a0333c884e4679f8d7a85112ee3078fe3d9004b2dd875585519"},
+]
+
 [[package]]
 name = "requests"
 version = "2.32.3"
@@ -2823,6 +2926,17 @@ files = [
     {file = "types_python_dateutil-2.9.0.20241003-py3-none-any.whl", hash = "sha256:250e1d8e80e7bbc3a6c99b907762711d1a1cdd00e978ad39cb5940f6f0a87f3d"},
 ]
 
+[[package]]
+name = "types-regex"
+version = "2024.11.6.20241108"
+description = "Typing stubs for regex"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "types-regex-2024.11.6.20241108.tar.gz", hash = "sha256:a774e307b99b3df49481b29e8b4962f021693052a8d8a2b9e6792fcec896cf5e"},
+    {file = "types_regex-2024.11.6.20241108-py3-none-any.whl", hash = "sha256:adec2ff2dfed00855551057334466fde923606599d01e7440556d53a3ef20835"},
+]
+
 [[package]]
 name = "typing-extensions"
 version = "4.12.2"
@@ -3215,4 +3329,4 @@ scrapy = ["scrapy"]
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.9"
-content-hash = "3698d5b2d562a7a83489d316a70b6685d4276f9aa9adb904ea5f39479cc8eeee"
+content-hash = "bd3a28d080a0548f41c8d9a50ede2725a8b12985203f2afebd98b7607471b003"
diff --git a/pyproject.toml b/pyproject.toml
index c3a01c41..dc4f56c1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -55,6 +55,7 @@ lazy-object-proxy = ">=1.10.0"
 scrapy = { version = ">=2.11.0", optional = true }
 typing-extensions = ">=4.1.0"
 websockets = ">=10.0 <14.0.0"
+regex = "^2024.11.6"
 
 [tool.poetry.group.dev.dependencies]
 build = "~1.2.0"
@@ -72,6 +73,7 @@ pytest-xdist = "~3.6.0"
 respx = "~0.21.0"
 ruff = "~0.7.0"
 setuptools = "~75.0.0"     # setuptools are used by pytest but not explicitly required
+types-regex = "^2024.11.6.20241108"
 
 [tool.poetry.extras]
 scrapy = ["scrapy"]
diff --git a/src/apify/_actor.py b/src/apify/_actor.py
index 872416a0..f60a99df 100644
--- a/src/apify/_actor.py
+++ b/src/apify/_actor.py
@@ -16,7 +16,6 @@
 from crawlee import service_container
 from crawlee.events._types import Event, EventPersistStateData
 
-from apify._actor_inputs import _create_request_list
 from apify._configuration import Configuration
 from apify._consts import EVENT_LISTENERS_TIMEOUT
 from apify._crypto import decrypt_input_secrets, load_private_key
@@ -32,9 +31,7 @@
     import logging
     from types import TracebackType
 
-    from crawlee.http_clients import BaseHttpClient
     from crawlee.proxy_configuration import _NewUrlFunction
-    from crawlee.storages import RequestList
 
     from apify._models import Webhook
 
@@ -977,22 +974,6 @@ async def create_proxy_configuration(
 
         return proxy_configuration
 
-    @staticmethod
-    async def create_request_list(
-        *, actor_start_urls_input: list[dict[str, Any]], http_client: BaseHttpClient | None = None
-    ) -> RequestList:
-        """Creates request list from Actor input requestListSources. This accepts list of urls and requests_from_url.
-
-        Example:
-            actor_start_urls_input = [
-                # Gather urls from response body.
-                {'requests_from_url': 'https://crawlee.dev/file.txt', 'method': 'GET'},
-                # Directly include this url.
-                {'url': 'https://crawlee.dev', 'method': 'GET'}
-            ]
-        """
-        return await _create_request_list(actor_start_urls_input=actor_start_urls_input, http_client=http_client)
-
 
 Actor = cast(_ActorType, Proxy(_ActorType))
 """The entry point of the SDK, through which all the Actor operations should be done."""
diff --git a/src/apify/storages/__init__.py b/src/apify/storages/__init__.py
index 2ed85e84..8fd33ba3 100644
--- a/src/apify/storages/__init__.py
+++ b/src/apify/storages/__init__.py
@@ -1,3 +1,5 @@
 from crawlee.storages import Dataset, KeyValueStore, RequestQueue
 
-__all__ = ['Dataset', 'KeyValueStore', 'RequestQueue']
+from ._actor_inputs import Input
+
+__all__ = ['Dataset', 'KeyValueStore', 'RequestQueue', 'Input']
diff --git a/src/apify/_actor_inputs.py b/src/apify/storages/_actor_inputs.py
similarity index 56%
rename from src/apify/_actor_inputs.py
rename to src/apify/storages/_actor_inputs.py
index d4048cb0..c53a051a 100644
--- a/src/apify/_actor_inputs.py
+++ b/src/apify/storages/_actor_inputs.py
@@ -1,47 +1,79 @@
 from __future__ import annotations
 
 import asyncio
-import re
 from asyncio import Task
 from functools import partial
-from typing import Any
+from typing import TYPE_CHECKING, Any
 
-from pydantic import BaseModel, Field
+import regex
+from pydantic import BaseModel, ConfigDict, Field
+
+if TYPE_CHECKING:
+    from typing_extensions import Self
 
 from crawlee import Request
-from crawlee._types import HttpMethod  #  TODO: Make public in Crawlee?
+from crawlee._types import HttpMethod
 from crawlee.http_clients import BaseHttpClient, HttpxHttpClient
 from crawlee.storages import RequestList
 
-URL_NO_COMMAS_REGEX = re.compile(
-    r'https?:\/\/(www\.)?([a-zA-Z0-9]|[a-zA-Z0-9][-a-zA-Z0-9@:%._+~#=]{0,254}[a-zA-Z0-9])\.[a-z]{2,63}(:\d{1,5})?(\/[-a-zA-Z0-9@:%_+.~#?&/=()]*)?'
+from ._known_actor_input_keys import ActorInputKeys
+
+URL_NO_COMMAS_REGEX = regex.compile(
+    r'https?:\/\/(www\.)?([\p{L}0-9]|[\p{L}0-9][-\p{L}0-9@:%._+~#=]{0,254}[\p{L}0-9])\.[a-z]{2,63}(:\d{1,5})?(\/[-\p{L}0-9@:%_+.~#?&/=()]*)?'
 )
 
+
+
 class _RequestDetails(BaseModel):
     method: HttpMethod
     payload: str = ''
     headers: dict[str, str] = Field(default_factory=dict)
-    user_data: dict[str, str]= Field(default_factory=dict, alias='user_data')
+    user_data: dict[str, str]= Field(default_factory=dict, alias=ActorInputKeys.startUrls.userData)
 
 class _RequestsFromUrlInput(_RequestDetails):
-    requests_from_url: str = Field(alias='requests_from_url')
+    requests_from_url: str = Field(alias=ActorInputKeys.startUrls.requestsFromUrl)
+
 
 class _SimpleUrlInput(_RequestDetails):
     url: str
 
+class Input(BaseModel):
+    model_config = ConfigDict(arbitrary_types_allowed=True)
+    start_urls: RequestList
+
+    @classmethod
+    async def read(cls, raw_input: dict[str, Any], http_client: BaseHttpClient | None = None) -> Self:
+        if ActorInputKeys.startUrls in raw_input:
+            request_list = await _create_request_list(
+                actor_start_urls_input=raw_input[ActorInputKeys.startUrls], http_client=http_client)
+        else:
+            request_list = RequestList()
+        return cls(start_urls=request_list)
 
-@staticmethod
 async def _create_request_list(
     *, actor_start_urls_input: list[dict[str, Any]], http_client: BaseHttpClient | None = None
 ) -> RequestList:
+    """Creates RequestList from Actor input requestListSources.
+
+    actor_start_urls_input  can contain list dicts with either url or requestsFromUrl key
+    http_client is client that will be used to send get request to url defined in requestsFromUrl
+
+    Example:
+        actor_start_urls_input = [
+            # Gather urls from response body.
+            {'requestsFromUrl': 'https://crawlee.dev/file.txt', 'method': 'GET'},
+            # Directly include this url.
+            {'url': 'https://crawlee.dev', 'method': 'GET'}
+        ]
+    """
     if not http_client:
         http_client = HttpxHttpClient()
     simple_url_requests_inputs = [
         _SimpleUrlInput(**request_input) for request_input in actor_start_urls_input
-        if 'url' in request_input]
+        if ActorInputKeys.startUrls.url in request_input]
     remote_url_requests_inputs = [
         _RequestsFromUrlInput(**request_input) for request_input in actor_start_urls_input
-        if 'requests_from_url' in request_input
+        if ActorInputKeys.startUrls.requestsFromUrl in request_input
     ]
 
     simple_url_requests = _create_requests_from_input(simple_url_requests_inputs)
@@ -50,7 +82,6 @@ async def _create_request_list(
     return RequestList(requests=simple_url_requests + remote_url_requests)
 
 
-@staticmethod
 def _create_requests_from_input(simple_url_inputs: list[_SimpleUrlInput]) -> list[Request]:
     return [
         Request.from_url(
@@ -64,21 +95,20 @@ def _create_requests_from_input(simple_url_inputs: list[_SimpleUrlInput]) -> lis
     ]
 
 
-@staticmethod
 async def _create_requests_from_url(
     remote_url_requests_inputs: list[_RequestsFromUrlInput], http_client: BaseHttpClient
 ) -> list[Request]:
     """Crete list of requests from url.
 
     Send GET requests to urls defined in each requests_from_url of remote_url_requests_inputs. Run extracting
-    callback on each response body and use URL_NO_COMMAS_REGEX regexp to find all links. Create list of Requests from
+    callback on each response body and use URL_NO_COMMAS_REGEX regex to find all links. Create list of Requests from
     collected links and additional inputs stored in other attributes of each remote_url_requests_inputs.
     """
     created_requests: list[Request] = []
 
-    def create_requests_from_response(request_input: _SimpleUrlInput, task: Task) -> list[Request]:
-        """Callback to scrape response body with regexp and create Requests from macthes."""
-        matches = re.finditer(URL_NO_COMMAS_REGEX, task.result().read().decode('utf-8'))
+    def create_requests_from_response(request_input: _RequestsFromUrlInput, task: Task) -> None:
+        """Callback to scrape response body with regexp and create Requests from matches."""
+        matches = regex.finditer(URL_NO_COMMAS_REGEX, task.result().read().decode('utf-8'))
         created_requests.extend([Request.from_url(
             match.group(0),
             method=request_input.method,
@@ -88,15 +118,15 @@ def create_requests_from_response(request_input: _SimpleUrlInput, task: Task) ->
 
     remote_url_requests = []
     for remote_url_requests_input in remote_url_requests_inputs:
-        task = asyncio.create_task(
+        get_response_task = asyncio.create_task(
             http_client.send_request(
                 method='GET',
                 url=remote_url_requests_input.requests_from_url,
             )
         )
 
-        task.add_done_callback(partial(create_requests_from_response, remote_url_requests_input))
-        remote_url_requests.append(task)
+        get_response_task.add_done_callback(partial(create_requests_from_response, remote_url_requests_input))
+        remote_url_requests.append(get_response_task)
 
     await asyncio.gather(*remote_url_requests)
     return created_requests
diff --git a/src/apify/storages/_known_actor_input_keys.py b/src/apify/storages/_known_actor_input_keys.py
new file mode 100644
index 00000000..49347393
--- /dev/null
+++ b/src/apify/storages/_known_actor_input_keys.py
@@ -0,0 +1,28 @@
+
+
+class _KnownInputKey(str):
+    __slots__ = ('_name',)
+    def __init__(self, name: str) -> None:
+        self._name = name
+
+    def __str__(self) -> str:
+        return self._name
+
+    def __repr__(self) ->str:
+        return str(self)
+
+class _StartUrls(_KnownInputKey):
+    url='url'
+    requestsFromUrl = 'requestsFromUrl'  # noqa: N815  # Intentional to respect actual naming of input keys.
+    method='method'
+    payload= 'payload'
+    userData='userData'  # noqa: N815  # Intentional to respect actual naming of input keys.
+    headers='headers'
+
+
+class _ActorInputKeys:
+    # Helper class to have actor input strings all in one place and easy to use with code completion.
+    startUrls: _StartUrls = _StartUrls('startUrls')  # noqa: N815  # Intentional to respect actual naming of input keys.
+    # More inputs should be gradually added
+
+ActorInputKeys = _ActorInputKeys()
diff --git a/tests/unit/actor/test_actor_create_request_list.py b/tests/unit/actor/test_actor_inputs.py
similarity index 52%
rename from tests/unit/actor/test_actor_create_request_list.py
rename to tests/unit/actor/test_actor_inputs.py
index 3114f8a2..4d7ef83e 100644
--- a/tests/unit/actor/test_actor_create_request_list.py
+++ b/tests/unit/actor/test_actor_inputs.py
@@ -1,58 +1,57 @@
 from __future__ import annotations
 
-import re
-import typing
+from typing import Any, Iterator, get_args
 from unittest import mock
 from unittest.mock import call
 
 import pytest
+import regex
 
-from crawlee._request import UserData  #  TODO: Make public in Crawlee?
-from crawlee._types import HttpHeaders, HttpMethod  #  TODO: Make public in Crawlee?
+from crawlee._request import UserData
+from crawlee._types import HttpHeaders, HttpMethod
 from crawlee.http_clients import HttpResponse, HttpxHttpClient
 
-from apify import Actor
-from apify._actor_inputs import URL_NO_COMMAS_REGEX
+from apify.storages._actor_inputs import URL_NO_COMMAS_REGEX, ActorInputKeys, Input
 
 
-@pytest.mark.parametrize('request_method', typing.get_args(HttpMethod))
+@pytest.mark.parametrize('request_method', get_args(HttpMethod))
 @pytest.mark.parametrize(
     'optional_input',
     [
         {},
-        {'payload': 'some payload', 'user_data': {'some key': 'some value'}, 'headers': {'h1': 'v1', 'h2': 'v2'}},
+        {ActorInputKeys.startUrls.payload: 'some payload', ActorInputKeys.startUrls.userData:
+            {'some key': 'some value'}, ActorInputKeys.startUrls.headers: {'h1': 'v1', 'h2': 'v2'}},
     ],
     ids=['minimal', 'all_options'],
 )
 async def test_actor_create_request_list_request_types(
-    request_method: HttpMethod, optional_input: dict[str, typing.Any]
+    request_method: HttpMethod, optional_input: dict[str, Any]
 ) -> None:
     """Test proper request list generation from both minimal and full inputs for all method types for simple input."""
-    minimal_request_dict_input = {'url': 'https://www.abc.com', 'method': request_method}
+    minimal_request_dict_input = {ActorInputKeys.startUrls.url: 'https://www.abc.com',
+                                  ActorInputKeys.startUrls.method: request_method}
     request_dict_input = {**minimal_request_dict_input, **optional_input}
-    example_start_urls_input = [
-        request_dict_input,
-    ]
+    example_actor_input: dict[str, Any] = {ActorInputKeys.startUrls: [request_dict_input]}
 
-    generated_request_list = await Actor.create_request_list(actor_start_urls_input=example_start_urls_input)
+    generated_input = await Input.read(example_actor_input)
 
-    assert not await generated_request_list.is_empty()
-    generated_request = await generated_request_list.fetch_next_request()
+    assert not await generated_input.start_urls.is_empty()
+    generated_request = await generated_input.start_urls.fetch_next_request()
     assert generated_request is not None
-    assert await generated_request_list.is_empty()
+    assert await generated_input.start_urls.is_empty()
 
-    assert generated_request.method == request_dict_input['method']
-    assert generated_request.url == request_dict_input['url']
-    assert generated_request.payload == request_dict_input.get('payload', '').encode('utf-8')
+    assert generated_request.method == request_dict_input[ActorInputKeys.startUrls.method]
+    assert generated_request.url == request_dict_input[ActorInputKeys.startUrls.url]
+    assert generated_request.payload == request_dict_input.get(ActorInputKeys.startUrls.payload, '').encode('utf-8')
     expected_user_data = UserData()
-    if 'user_data' in optional_input:
-        for key, value in optional_input['user_data'].items():
+    if ActorInputKeys.startUrls.userData in optional_input:
+        for key, value in optional_input[ActorInputKeys.startUrls.userData].items():
             expected_user_data[key] = value
     assert generated_request.user_data == expected_user_data
-    assert generated_request.headers.root == optional_input.get('headers', {})
+    assert generated_request.headers.root == optional_input.get(ActorInputKeys.startUrls.headers, {})
 
 
-def _create_dummy_response(read_output: typing.Iterator[str]) -> HttpResponse:
+def _create_dummy_response(read_output: Iterator[str]) -> HttpResponse:
     """Create dummy_response that will iterate through read_output when called like dummy_response.read()"""
 
     class DummyResponse(HttpResponse):
@@ -76,31 +75,31 @@ def read(self) -> bytes:
 
 async def test_actor_create_request_list_from_url_correctly_send_requests() -> None:
     """Test that injected HttpClient's method send_request is called with properly passed arguments."""
-
-    example_start_urls_input: list[dict[str, typing.Any]] = [
-        {'requests_from_url': 'https://crawlee.dev/file.txt', 'method': 'GET'},
-        {'requests_from_url': 'https://www.crawlee.dev/file2', 'method': 'PUT'},
+    example_actor_input: dict[str, Any] = {ActorInputKeys.startUrls: [
+        {ActorInputKeys.startUrls.requestsFromUrl: 'https://abc.dev/file.txt', ActorInputKeys.startUrls.method: 'GET'},
+        {ActorInputKeys.startUrls.requestsFromUrl: 'https://www.abc.dev/file2', ActorInputKeys.startUrls.method: 'PUT'},
         {
-            'requests_from_url': 'https://www.something.som',
-            'method': 'POST',
-            'headers': {'key': 'value'},
-            'payload': 'some_payload',
-            'user_data': {'another_key': 'another_value'},
+            ActorInputKeys.startUrls.requestsFromUrl: 'https://www.something.som',
+            ActorInputKeys.startUrls.method: 'POST',
+            ActorInputKeys.startUrls.headers: {'key': 'value'},
+            ActorInputKeys.startUrls.payload: 'some_payload',
+            ActorInputKeys.startUrls.userData: {'another_key': 'another_value'},
         },
-    ]
-    mocked_read_outputs = ('' for url in example_start_urls_input)
+    ]}
+
+    mocked_read_outputs = ('' for url in example_actor_input[ActorInputKeys.startUrls])
     http_client = HttpxHttpClient()
     with mock.patch.object(
         http_client, 'send_request', return_value=_create_dummy_response(mocked_read_outputs)
     ) as mocked_send_request:
-        await Actor.create_request_list(actor_start_urls_input=example_start_urls_input, http_client=http_client)
+        await Input.read(example_actor_input, http_client=http_client)
 
     expected_calls = [
         call(
             method='GET',
-            url=example_input['requests_from_url'],
+            url=example_input[ActorInputKeys.startUrls.requestsFromUrl],
         )
-        for example_input in example_start_urls_input
+        for example_input in example_actor_input[ActorInputKeys.startUrls]
     ]
     mocked_send_request.assert_has_calls(expected_calls)
 
@@ -118,19 +117,17 @@ async def test_actor_create_request_list_from_url() -> None:
         )
     )
 
-    example_start_urls_input = [
-        {'requests_from_url': 'https://crawlee.dev/file.txt', 'method': 'GET'},
-        {'url': expected_simple_url, 'method': 'GET'},
-        {'requests_from_url': 'https://www.crawlee.dev/file2', 'method': 'GET'},
-    ]
+    example_actor_input:dict[str, Any] = {ActorInputKeys.startUrls:[
+        {ActorInputKeys.startUrls.requestsFromUrl: 'https://abc.dev/file.txt', ActorInputKeys.startUrls.method: 'GET'},
+        {ActorInputKeys.startUrls.url: expected_simple_url, ActorInputKeys.startUrls.method: 'GET'},
+        {ActorInputKeys.startUrls.requestsFromUrl: 'https://www.abc.dev/file2', ActorInputKeys.startUrls.method: 'GET'},
+    ]}
 
     http_client = HttpxHttpClient()
     with mock.patch.object(http_client, 'send_request', return_value=_create_dummy_response(response_bodies)):
-        generated_request_list = await Actor.create_request_list(
-            actor_start_urls_input=example_start_urls_input, http_client=http_client
-        )
+        generated_input = await Input.read(example_actor_input, http_client=http_client)
         generated_requests = []
-        while request := await generated_request_list.fetch_next_request():
+        while request := await generated_input.start_urls.fetch_next_request():
             generated_requests.append(request)
 
     # Check correctly created requests' urls in request list
@@ -139,27 +136,27 @@ async def test_actor_create_request_list_from_url() -> None:
 async def test_actor_create_request_list_from_url_additional_inputs()  -> None:
     """Test that all generated request properties are correctly populated from input values."""
     expected_simple_url = 'https://www.someurl.com'
-    example_start_urls_input = [
-        {'requests_from_url': 'https://crawlee.dev/file.txt',             'method': 'POST',
-            'headers': {'key': 'value'},
-            'payload': 'some_payload',
-            'user_data': {'another_key': 'another_value'}},
-    ]
+    example_actor_input: dict[str, Any] = {ActorInputKeys.startUrls:[
+        {ActorInputKeys.startUrls.requestsFromUrl: 'https://crawlee.dev/file.txt', 'method': 'POST',
+         ActorInputKeys.startUrls.headers: {'key': 'value'},
+         ActorInputKeys.startUrls.payload: 'some_payload',
+         ActorInputKeys.startUrls.userData: {'another_key': 'another_value'}},
+    ]}
     response_bodies = iter((expected_simple_url,))
     http_client = HttpxHttpClient()
     with mock.patch.object(http_client, 'send_request', return_value=_create_dummy_response(response_bodies)):
-        generated_request_list = await Actor.create_request_list(
-            actor_start_urls_input=example_start_urls_input, http_client=http_client
-        )
-        request = await generated_request_list.fetch_next_request()
+        generated_input = await Input.read(example_actor_input, http_client=http_client)
+        request = await generated_input.start_urls.fetch_next_request()
 
     # Check all properties correctly created for request
+    assert request
     assert request.url == expected_simple_url
-    assert request.method == example_start_urls_input[0]['method']
-    assert request.headers.root == example_start_urls_input[0]['headers']
-    assert request.payload == example_start_urls_input[0]['payload'].encode('utf-8')
+    assert request.method == example_actor_input[ActorInputKeys.startUrls][0][ActorInputKeys.startUrls.method]
+    assert request.headers.root == example_actor_input[ActorInputKeys.startUrls][0][ActorInputKeys.startUrls.headers]
+    assert request.payload == example_actor_input[ActorInputKeys.startUrls][0][ActorInputKeys.startUrls.payload].encode(
+        'utf-8')
     expected_user_data = UserData()
-    for key, value in example_start_urls_input[0]['user_data'].items():
+    for key, value in example_actor_input[ActorInputKeys.startUrls][0][ActorInputKeys.startUrls.userData].items():
         expected_user_data[key] = value
     assert request.user_data == expected_user_data
 
@@ -170,16 +167,16 @@ async def test_actor_create_request_list_from_url_additional_inputs()  -> None:
     'http://nowww.cz',
     'https://with-hypen.com',
     'http://number1.com',
-    'http://www.number.123',
+    'http://www.number.123.abc',
     'http://many.dots.com',
     'http://a.com',
     'http://www.something.com/somethignelse'
     'http://www.something.com/somethignelse.txt',
-    #  "http://non-english-chars-á.com"  # re module not suitable, regex can do this with \p{L}. Do we want this?
+    'http://non-english-chars-áíéåü.com'
 ])
 def test_url_no_commas_regex_true_positives(true_positive: str) -> None:
     example_string= f'Some text {true_positive} some more text'
-    matches = list(re.finditer(URL_NO_COMMAS_REGEX, example_string))
+    matches = list(regex.finditer(URL_NO_COMMAS_REGEX, example_string))
     assert len(matches) == 1
     assert matches[0].group(0) == true_positive
 
@@ -193,12 +190,12 @@ def test_url_no_commas_regex_true_positives(true_positive: str) -> None:
 ])
 def test_url_no_commas_regex_false_positives(false_positive: str) -> None:
     example_string= f'Some text {false_positive} some more text'
-    matches = list(re.findall(URL_NO_COMMAS_REGEX, example_string))
+    matches = list(regex.findall(URL_NO_COMMAS_REGEX, example_string))
     assert len(matches) == 0
 
 def test_url_no_commas_regex_multi_line() -> None:
     true_positives = ('http://www.something.com', 'http://www.else.com')
     example_string= 'Some text {} some more text \n Some new line text {} ...'.format(*true_positives)
-    matches = list(re.finditer(URL_NO_COMMAS_REGEX, example_string))
+    matches = list(regex.finditer(URL_NO_COMMAS_REGEX, example_string))
     assert len(matches) == 2
     assert {match.group(0) for match in matches} == set(true_positives)

From ff3e047049281ee8840581cb7a70f680fea74921 Mon Sep 17 00:00:00 2001
From: Josef Prochazka <josef.prochazka@apify.com>
Date: Mon, 18 Nov 2024 17:49:22 +0100
Subject: [PATCH 09/20] Use re with \w Add few more tests for regex

---
 poetry.lock                           | 105 +-------------------------
 pyproject.toml                        |   1 -
 src/apify/storages/_actor_inputs.py   |   8 +-
 tests/unit/actor/test_actor_inputs.py |  12 +--
 4 files changed, 12 insertions(+), 114 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index f94db0ae..f43b2f3a 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -2449,109 +2449,6 @@ files = [
     {file = "readchar-4.2.1.tar.gz", hash = "sha256:91ce3faf07688de14d800592951e5575e9c7a3213738ed01d394dcc949b79adb"},
 ]
 
-[[package]]
-name = "regex"
-version = "2024.11.6"
-description = "Alternative regular expression module, to replace re."
-optional = false
-python-versions = ">=3.8"
-files = [
-    {file = "regex-2024.11.6-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:ff590880083d60acc0433f9c3f713c51f7ac6ebb9adf889c79a261ecf541aa91"},
-    {file = "regex-2024.11.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:658f90550f38270639e83ce492f27d2c8d2cd63805c65a13a14d36ca126753f0"},
-    {file = "regex-2024.11.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:164d8b7b3b4bcb2068b97428060b2a53be050085ef94eca7f240e7947f1b080e"},
-    {file = "regex-2024.11.6-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d3660c82f209655a06b587d55e723f0b813d3a7db2e32e5e7dc64ac2a9e86fde"},
-    {file = "regex-2024.11.6-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d22326fcdef5e08c154280b71163ced384b428343ae16a5ab2b3354aed12436e"},
-    {file = "regex-2024.11.6-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f1ac758ef6aebfc8943560194e9fd0fa18bcb34d89fd8bd2af18183afd8da3a2"},
-    {file = "regex-2024.11.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:997d6a487ff00807ba810e0f8332c18b4eb8d29463cfb7c820dc4b6e7562d0cf"},
-    {file = "regex-2024.11.6-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:02a02d2bb04fec86ad61f3ea7f49c015a0681bf76abb9857f945d26159d2968c"},
-    {file = "regex-2024.11.6-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:f02f93b92358ee3f78660e43b4b0091229260c5d5c408d17d60bf26b6c900e86"},
-    {file = "regex-2024.11.6-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:06eb1be98df10e81ebaded73fcd51989dcf534e3c753466e4b60c4697a003b67"},
-    {file = "regex-2024.11.6-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:040df6fe1a5504eb0f04f048e6d09cd7c7110fef851d7c567a6b6e09942feb7d"},
-    {file = "regex-2024.11.6-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:fdabbfc59f2c6edba2a6622c647b716e34e8e3867e0ab975412c5c2f79b82da2"},
-    {file = "regex-2024.11.6-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:8447d2d39b5abe381419319f942de20b7ecd60ce86f16a23b0698f22e1b70008"},
-    {file = "regex-2024.11.6-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:da8f5fc57d1933de22a9e23eec290a0d8a5927a5370d24bda9a6abe50683fe62"},
-    {file = "regex-2024.11.6-cp310-cp310-win32.whl", hash = "sha256:b489578720afb782f6ccf2840920f3a32e31ba28a4b162e13900c3e6bd3f930e"},
-    {file = "regex-2024.11.6-cp310-cp310-win_amd64.whl", hash = "sha256:5071b2093e793357c9d8b2929dfc13ac5f0a6c650559503bb81189d0a3814519"},
-    {file = "regex-2024.11.6-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:5478c6962ad548b54a591778e93cd7c456a7a29f8eca9c49e4f9a806dcc5d638"},
-    {file = "regex-2024.11.6-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:2c89a8cc122b25ce6945f0423dc1352cb9593c68abd19223eebbd4e56612c5b7"},
-    {file = "regex-2024.11.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:94d87b689cdd831934fa3ce16cc15cd65748e6d689f5d2b8f4f4df2065c9fa20"},
-    {file = "regex-2024.11.6-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1062b39a0a2b75a9c694f7a08e7183a80c63c0d62b301418ffd9c35f55aaa114"},
-    {file = "regex-2024.11.6-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:167ed4852351d8a750da48712c3930b031f6efdaa0f22fa1933716bfcd6bf4a3"},
-    {file = "regex-2024.11.6-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2d548dafee61f06ebdb584080621f3e0c23fff312f0de1afc776e2a2ba99a74f"},
-    {file = "regex-2024.11.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f2a19f302cd1ce5dd01a9099aaa19cae6173306d1302a43b627f62e21cf18ac0"},
-    {file = "regex-2024.11.6-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bec9931dfb61ddd8ef2ebc05646293812cb6b16b60cf7c9511a832b6f1854b55"},
-    {file = "regex-2024.11.6-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:9714398225f299aa85267fd222f7142fcb5c769e73d7733344efc46f2ef5cf89"},
-    {file = "regex-2024.11.6-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:202eb32e89f60fc147a41e55cb086db2a3f8cb82f9a9a88440dcfc5d37faae8d"},
-    {file = "regex-2024.11.6-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:4181b814e56078e9b00427ca358ec44333765f5ca1b45597ec7446d3a1ef6e34"},
-    {file = "regex-2024.11.6-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:068376da5a7e4da51968ce4c122a7cd31afaaec4fccc7856c92f63876e57b51d"},
-    {file = "regex-2024.11.6-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ac10f2c4184420d881a3475fb2c6f4d95d53a8d50209a2500723d831036f7c45"},
-    {file = "regex-2024.11.6-cp311-cp311-win32.whl", hash = "sha256:c36f9b6f5f8649bb251a5f3f66564438977b7ef8386a52460ae77e6070d309d9"},
-    {file = "regex-2024.11.6-cp311-cp311-win_amd64.whl", hash = "sha256:02e28184be537f0e75c1f9b2f8847dc51e08e6e171c6bde130b2687e0c33cf60"},
-    {file = "regex-2024.11.6-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:52fb28f528778f184f870b7cf8f225f5eef0a8f6e3778529bdd40c7b3920796a"},
-    {file = "regex-2024.11.6-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:fdd6028445d2460f33136c55eeb1f601ab06d74cb3347132e1c24250187500d9"},
-    {file = "regex-2024.11.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:805e6b60c54bf766b251e94526ebad60b7de0c70f70a4e6210ee2891acb70bf2"},
-    {file = "regex-2024.11.6-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b85c2530be953a890eaffde05485238f07029600e8f098cdf1848d414a8b45e4"},
-    {file = "regex-2024.11.6-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bb26437975da7dc36b7efad18aa9dd4ea569d2357ae6b783bf1118dabd9ea577"},
-    {file = "regex-2024.11.6-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:abfa5080c374a76a251ba60683242bc17eeb2c9818d0d30117b4486be10c59d3"},
-    {file = "regex-2024.11.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:70b7fa6606c2881c1db9479b0eaa11ed5dfa11c8d60a474ff0e095099f39d98e"},
-    {file = "regex-2024.11.6-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0c32f75920cf99fe6b6c539c399a4a128452eaf1af27f39bce8909c9a3fd8cbe"},
-    {file = "regex-2024.11.6-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:982e6d21414e78e1f51cf595d7f321dcd14de1f2881c5dc6a6e23bbbbd68435e"},
-    {file = "regex-2024.11.6-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:a7c2155f790e2fb448faed6dd241386719802296ec588a8b9051c1f5c481bc29"},
-    {file = "regex-2024.11.6-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:149f5008d286636e48cd0b1dd65018548944e495b0265b45e1bffecce1ef7f39"},
-    {file = "regex-2024.11.6-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:e5364a4502efca094731680e80009632ad6624084aff9a23ce8c8c6820de3e51"},
-    {file = "regex-2024.11.6-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:0a86e7eeca091c09e021db8eb72d54751e527fa47b8d5787caf96d9831bd02ad"},
-    {file = "regex-2024.11.6-cp312-cp312-win32.whl", hash = "sha256:32f9a4c643baad4efa81d549c2aadefaeba12249b2adc5af541759237eee1c54"},
-    {file = "regex-2024.11.6-cp312-cp312-win_amd64.whl", hash = "sha256:a93c194e2df18f7d264092dc8539b8ffb86b45b899ab976aa15d48214138e81b"},
-    {file = "regex-2024.11.6-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:a6ba92c0bcdf96cbf43a12c717eae4bc98325ca3730f6b130ffa2e3c3c723d84"},
-    {file = "regex-2024.11.6-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:525eab0b789891ac3be914d36893bdf972d483fe66551f79d3e27146191a37d4"},
-    {file = "regex-2024.11.6-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:086a27a0b4ca227941700e0b31425e7a28ef1ae8e5e05a33826e17e47fbfdba0"},
-    {file = "regex-2024.11.6-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bde01f35767c4a7899b7eb6e823b125a64de314a8ee9791367c9a34d56af18d0"},
-    {file = "regex-2024.11.6-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b583904576650166b3d920d2bcce13971f6f9e9a396c673187f49811b2769dc7"},
-    {file = "regex-2024.11.6-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1c4de13f06a0d54fa0d5ab1b7138bfa0d883220965a29616e3ea61b35d5f5fc7"},
-    {file = "regex-2024.11.6-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3cde6e9f2580eb1665965ce9bf17ff4952f34f5b126beb509fee8f4e994f143c"},
-    {file = "regex-2024.11.6-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0d7f453dca13f40a02b79636a339c5b62b670141e63efd511d3f8f73fba162b3"},
-    {file = "regex-2024.11.6-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:59dfe1ed21aea057a65c6b586afd2a945de04fc7db3de0a6e3ed5397ad491b07"},
-    {file = "regex-2024.11.6-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:b97c1e0bd37c5cd7902e65f410779d39eeda155800b65fc4d04cc432efa9bc6e"},
-    {file = "regex-2024.11.6-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:f9d1e379028e0fc2ae3654bac3cbbef81bf3fd571272a42d56c24007979bafb6"},
-    {file = "regex-2024.11.6-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:13291b39131e2d002a7940fb176e120bec5145f3aeb7621be6534e46251912c4"},
-    {file = "regex-2024.11.6-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4f51f88c126370dcec4908576c5a627220da6c09d0bff31cfa89f2523843316d"},
-    {file = "regex-2024.11.6-cp313-cp313-win32.whl", hash = "sha256:63b13cfd72e9601125027202cad74995ab26921d8cd935c25f09c630436348ff"},
-    {file = "regex-2024.11.6-cp313-cp313-win_amd64.whl", hash = "sha256:2b3361af3198667e99927da8b84c1b010752fa4b1115ee30beaa332cabc3ef1a"},
-    {file = "regex-2024.11.6-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:3a51ccc315653ba012774efca4f23d1d2a8a8f278a6072e29c7147eee7da446b"},
-    {file = "regex-2024.11.6-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ad182d02e40de7459b73155deb8996bbd8e96852267879396fb274e8700190e3"},
-    {file = "regex-2024.11.6-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:ba9b72e5643641b7d41fa1f6d5abda2c9a263ae835b917348fc3c928182ad467"},
-    {file = "regex-2024.11.6-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:40291b1b89ca6ad8d3f2b82782cc33807f1406cf68c8d440861da6304d8ffbbd"},
-    {file = "regex-2024.11.6-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cdf58d0e516ee426a48f7b2c03a332a4114420716d55769ff7108c37a09951bf"},
-    {file = "regex-2024.11.6-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a36fdf2af13c2b14738f6e973aba563623cb77d753bbbd8d414d18bfaa3105dd"},
-    {file = "regex-2024.11.6-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d1cee317bfc014c2419a76bcc87f071405e3966da434e03e13beb45f8aced1a6"},
-    {file = "regex-2024.11.6-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:50153825ee016b91549962f970d6a4442fa106832e14c918acd1c8e479916c4f"},
-    {file = "regex-2024.11.6-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:ea1bfda2f7162605f6e8178223576856b3d791109f15ea99a9f95c16a7636fb5"},
-    {file = "regex-2024.11.6-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:df951c5f4a1b1910f1a99ff42c473ff60f8225baa1cdd3539fe2819d9543e9df"},
-    {file = "regex-2024.11.6-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:072623554418a9911446278f16ecb398fb3b540147a7828c06e2011fa531e773"},
-    {file = "regex-2024.11.6-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:f654882311409afb1d780b940234208a252322c24a93b442ca714d119e68086c"},
-    {file = "regex-2024.11.6-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:89d75e7293d2b3e674db7d4d9b1bee7f8f3d1609428e293771d1a962617150cc"},
-    {file = "regex-2024.11.6-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:f65557897fc977a44ab205ea871b690adaef6b9da6afda4790a2484b04293a5f"},
-    {file = "regex-2024.11.6-cp38-cp38-win32.whl", hash = "sha256:6f44ec28b1f858c98d3036ad5d7d0bfc568bdd7a74f9c24e25f41ef1ebfd81a4"},
-    {file = "regex-2024.11.6-cp38-cp38-win_amd64.whl", hash = "sha256:bb8f74f2f10dbf13a0be8de623ba4f9491faf58c24064f32b65679b021ed0001"},
-    {file = "regex-2024.11.6-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:5704e174f8ccab2026bd2f1ab6c510345ae8eac818b613d7d73e785f1310f839"},
-    {file = "regex-2024.11.6-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:220902c3c5cc6af55d4fe19ead504de80eb91f786dc102fbd74894b1551f095e"},
-    {file = "regex-2024.11.6-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5e7e351589da0850c125f1600a4c4ba3c722efefe16b297de54300f08d734fbf"},
-    {file = "regex-2024.11.6-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5056b185ca113c88e18223183aa1a50e66507769c9640a6ff75859619d73957b"},
-    {file = "regex-2024.11.6-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2e34b51b650b23ed3354b5a07aab37034d9f923db2a40519139af34f485f77d0"},
-    {file = "regex-2024.11.6-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5670bce7b200273eee1840ef307bfa07cda90b38ae56e9a6ebcc9f50da9c469b"},
-    {file = "regex-2024.11.6-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:08986dce1339bc932923e7d1232ce9881499a0e02925f7402fb7c982515419ef"},
-    {file = "regex-2024.11.6-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:93c0b12d3d3bc25af4ebbf38f9ee780a487e8bf6954c115b9f015822d3bb8e48"},
-    {file = "regex-2024.11.6-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:764e71f22ab3b305e7f4c21f1a97e1526a25ebdd22513e251cf376760213da13"},
-    {file = "regex-2024.11.6-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:f056bf21105c2515c32372bbc057f43eb02aae2fda61052e2f7622c801f0b4e2"},
-    {file = "regex-2024.11.6-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:69ab78f848845569401469da20df3e081e6b5a11cb086de3eed1d48f5ed57c95"},
-    {file = "regex-2024.11.6-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:86fddba590aad9208e2fa8b43b4c098bb0ec74f15718bb6a704e3c63e2cef3e9"},
-    {file = "regex-2024.11.6-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:684d7a212682996d21ca12ef3c17353c021fe9de6049e19ac8481ec35574a70f"},
-    {file = "regex-2024.11.6-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:a03e02f48cd1abbd9f3b7e3586d97c8f7a9721c436f51a5245b3b9483044480b"},
-    {file = "regex-2024.11.6-cp39-cp39-win32.whl", hash = "sha256:41758407fc32d5c3c5de163888068cfee69cb4c2be844e7ac517a52770f9af57"},
-    {file = "regex-2024.11.6-cp39-cp39-win_amd64.whl", hash = "sha256:b2837718570f95dd41675328e111345f9b7095d821bac435aac173ac80b19983"},
-    {file = "regex-2024.11.6.tar.gz", hash = "sha256:7ab159b063c52a0333c884e4679f8d7a85112ee3078fe3d9004b2dd875585519"},
-]
-
 [[package]]
 name = "requests"
 version = "2.32.3"
@@ -3329,4 +3226,4 @@ scrapy = ["scrapy"]
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.9"
-content-hash = "bd3a28d080a0548f41c8d9a50ede2725a8b12985203f2afebd98b7607471b003"
+content-hash = "da388b618b4c9b95567d426529d0b7cda05d33909995c409e595c99e6f1767ff"
diff --git a/pyproject.toml b/pyproject.toml
index dc4f56c1..68091faa 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -55,7 +55,6 @@ lazy-object-proxy = ">=1.10.0"
 scrapy = { version = ">=2.11.0", optional = true }
 typing-extensions = ">=4.1.0"
 websockets = ">=10.0 <14.0.0"
-regex = "^2024.11.6"
 
 [tool.poetry.group.dev.dependencies]
 build = "~1.2.0"
diff --git a/src/apify/storages/_actor_inputs.py b/src/apify/storages/_actor_inputs.py
index c53a051a..9437a578 100644
--- a/src/apify/storages/_actor_inputs.py
+++ b/src/apify/storages/_actor_inputs.py
@@ -1,11 +1,11 @@
 from __future__ import annotations
 
 import asyncio
+import re
 from asyncio import Task
 from functools import partial
 from typing import TYPE_CHECKING, Any
 
-import regex
 from pydantic import BaseModel, ConfigDict, Field
 
 if TYPE_CHECKING:
@@ -18,8 +18,8 @@
 
 from ._known_actor_input_keys import ActorInputKeys
 
-URL_NO_COMMAS_REGEX = regex.compile(
-    r'https?:\/\/(www\.)?([\p{L}0-9]|[\p{L}0-9][-\p{L}0-9@:%._+~#=]{0,254}[\p{L}0-9])\.[a-z]{2,63}(:\d{1,5})?(\/[-\p{L}0-9@:%_+.~#?&/=()]*)?'
+URL_NO_COMMAS_REGEX = re.compile(
+    r'https?:\/\/(www\.)?([^\W_]|[^\W_][-\w0-9@:%._+~#=]{0,254}[^\W_])\.[a-z]{2,63}(:\d{1,5})?(\/[-\w@:%+.~#?&/=()]*)?'
 )
 
 
@@ -108,7 +108,7 @@ async def _create_requests_from_url(
 
     def create_requests_from_response(request_input: _RequestsFromUrlInput, task: Task) -> None:
         """Callback to scrape response body with regexp and create Requests from matches."""
-        matches = regex.finditer(URL_NO_COMMAS_REGEX, task.result().read().decode('utf-8'))
+        matches = re.finditer(URL_NO_COMMAS_REGEX, task.result().read().decode('utf-8'))
         created_requests.extend([Request.from_url(
             match.group(0),
             method=request_input.method,
diff --git a/tests/unit/actor/test_actor_inputs.py b/tests/unit/actor/test_actor_inputs.py
index 4d7ef83e..f039e549 100644
--- a/tests/unit/actor/test_actor_inputs.py
+++ b/tests/unit/actor/test_actor_inputs.py
@@ -1,11 +1,11 @@
 from __future__ import annotations
 
+import re
 from typing import Any, Iterator, get_args
 from unittest import mock
 from unittest.mock import call
 
 import pytest
-import regex
 
 from crawlee._request import UserData
 from crawlee._types import HttpHeaders, HttpMethod
@@ -172,11 +172,13 @@ async def test_actor_create_request_list_from_url_additional_inputs()  -> None:
     'http://a.com',
     'http://www.something.com/somethignelse'
     'http://www.something.com/somethignelse.txt',
-    'http://non-english-chars-áíéåü.com'
+    'http://non-english-chars-áíéåü.com',
+    'http://www.port.com:1234',
+    'http://username:password@something.apify.com'
 ])
 def test_url_no_commas_regex_true_positives(true_positive: str) -> None:
     example_string= f'Some text {true_positive} some more text'
-    matches = list(regex.finditer(URL_NO_COMMAS_REGEX, example_string))
+    matches = list(re.finditer(URL_NO_COMMAS_REGEX, example_string))
     assert len(matches) == 1
     assert matches[0].group(0) == true_positive
 
@@ -190,12 +192,12 @@ def test_url_no_commas_regex_true_positives(true_positive: str) -> None:
 ])
 def test_url_no_commas_regex_false_positives(false_positive: str) -> None:
     example_string= f'Some text {false_positive} some more text'
-    matches = list(regex.findall(URL_NO_COMMAS_REGEX, example_string))
+    matches = list(re.findall(URL_NO_COMMAS_REGEX, example_string))
     assert len(matches) == 0
 
 def test_url_no_commas_regex_multi_line() -> None:
     true_positives = ('http://www.something.com', 'http://www.else.com')
     example_string= 'Some text {} some more text \n Some new line text {} ...'.format(*true_positives)
-    matches = list(regex.finditer(URL_NO_COMMAS_REGEX, example_string))
+    matches = list(re.finditer(URL_NO_COMMAS_REGEX, example_string))
     assert len(matches) == 2
     assert {match.group(0) for match in matches} == set(true_positives)

From b4ad24fb37db5d4a66431e7642576847a08c8e5e Mon Sep 17 00:00:00 2001
From: Josef Prochazka <josef.prochazka@apify.com>
Date: Mon, 18 Nov 2024 18:07:47 +0100
Subject: [PATCH 10/20] Reduce some test code repetition.

---
 tests/unit/actor/test_actor_inputs.py | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/tests/unit/actor/test_actor_inputs.py b/tests/unit/actor/test_actor_inputs.py
index f039e549..08a3f155 100644
--- a/tests/unit/actor/test_actor_inputs.py
+++ b/tests/unit/actor/test_actor_inputs.py
@@ -136,12 +136,13 @@ async def test_actor_create_request_list_from_url() -> None:
 async def test_actor_create_request_list_from_url_additional_inputs()  -> None:
     """Test that all generated request properties are correctly populated from input values."""
     expected_simple_url = 'https://www.someurl.com'
-    example_actor_input: dict[str, Any] = {ActorInputKeys.startUrls:[
-        {ActorInputKeys.startUrls.requestsFromUrl: 'https://crawlee.dev/file.txt', 'method': 'POST',
-         ActorInputKeys.startUrls.headers: {'key': 'value'},
-         ActorInputKeys.startUrls.payload: 'some_payload',
-         ActorInputKeys.startUrls.userData: {'another_key': 'another_value'}},
-    ]}
+    example_start_url_input = {
+        ActorInputKeys.startUrls.requestsFromUrl: 'https://crawlee.dev/file.txt',
+        ActorInputKeys.startUrls.method: 'POST',
+        ActorInputKeys.startUrls.headers: {'key': 'value'},
+        ActorInputKeys.startUrls.payload: 'some_payload',
+        ActorInputKeys.startUrls.userData: {'another_key': 'another_value'}}
+    example_actor_input: dict[str, Any] = {ActorInputKeys.startUrls:[example_start_url_input]}
     response_bodies = iter((expected_simple_url,))
     http_client = HttpxHttpClient()
     with mock.patch.object(http_client, 'send_request', return_value=_create_dummy_response(response_bodies)):
@@ -149,12 +150,12 @@ async def test_actor_create_request_list_from_url_additional_inputs()  -> None:
         request = await generated_input.start_urls.fetch_next_request()
 
     # Check all properties correctly created for request
+    example_start_url_input = example_actor_input[ActorInputKeys.startUrls][0]
     assert request
     assert request.url == expected_simple_url
-    assert request.method == example_actor_input[ActorInputKeys.startUrls][0][ActorInputKeys.startUrls.method]
-    assert request.headers.root == example_actor_input[ActorInputKeys.startUrls][0][ActorInputKeys.startUrls.headers]
-    assert request.payload == example_actor_input[ActorInputKeys.startUrls][0][ActorInputKeys.startUrls.payload].encode(
-        'utf-8')
+    assert request.method == example_start_url_input[ActorInputKeys.startUrls.method]
+    assert request.headers.root == example_start_url_input[ActorInputKeys.startUrls.headers]
+    assert request.payload == str(example_start_url_input[ActorInputKeys.startUrls.payload]).encode('utf-8')
     expected_user_data = UserData()
     for key, value in example_actor_input[ActorInputKeys.startUrls][0][ActorInputKeys.startUrls.userData].items():
         expected_user_data[key] = value
@@ -174,7 +175,7 @@ async def test_actor_create_request_list_from_url_additional_inputs()  -> None:
     'http://www.something.com/somethignelse.txt',
     'http://non-english-chars-áíéåü.com',
     'http://www.port.com:1234',
-    'http://username:password@something.apify.com'
+    'http://username:password@something.else.com'
 ])
 def test_url_no_commas_regex_true_positives(true_positive: str) -> None:
     example_string= f'Some text {true_positive} some more text'

From 05d048a0f4afcd9a578f5333945a76e7276df39b Mon Sep 17 00:00:00 2001
From: Josef Prochazka <josef.prochazka@apify.com>
Date: Mon, 18 Nov 2024 18:09:21 +0100
Subject: [PATCH 11/20] Remove types-regex

---
 poetry.lock    | 13 +------------
 pyproject.toml |  1 -
 2 files changed, 1 insertion(+), 13 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index f43b2f3a..768801be 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -2823,17 +2823,6 @@ files = [
     {file = "types_python_dateutil-2.9.0.20241003-py3-none-any.whl", hash = "sha256:250e1d8e80e7bbc3a6c99b907762711d1a1cdd00e978ad39cb5940f6f0a87f3d"},
 ]
 
-[[package]]
-name = "types-regex"
-version = "2024.11.6.20241108"
-description = "Typing stubs for regex"
-optional = false
-python-versions = ">=3.8"
-files = [
-    {file = "types-regex-2024.11.6.20241108.tar.gz", hash = "sha256:a774e307b99b3df49481b29e8b4962f021693052a8d8a2b9e6792fcec896cf5e"},
-    {file = "types_regex-2024.11.6.20241108-py3-none-any.whl", hash = "sha256:adec2ff2dfed00855551057334466fde923606599d01e7440556d53a3ef20835"},
-]
-
 [[package]]
 name = "typing-extensions"
 version = "4.12.2"
@@ -3226,4 +3215,4 @@ scrapy = ["scrapy"]
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.9"
-content-hash = "da388b618b4c9b95567d426529d0b7cda05d33909995c409e595c99e6f1767ff"
+content-hash = "3698d5b2d562a7a83489d316a70b6685d4276f9aa9adb904ea5f39479cc8eeee"
diff --git a/pyproject.toml b/pyproject.toml
index 68091faa..c3a01c41 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -72,7 +72,6 @@ pytest-xdist = "~3.6.0"
 respx = "~0.21.0"
 ruff = "~0.7.0"
 setuptools = "~75.0.0"     # setuptools are used by pytest but not explicitly required
-types-regex = "^2024.11.6.20241108"
 
 [tool.poetry.extras]
 scrapy = ["scrapy"]

From 376ae8bea6d13f949c46fe5274df556adf559cdf Mon Sep 17 00:00:00 2001
From: Josef Prochazka <josef.prochazka@apify.com>
Date: Mon, 18 Nov 2024 18:23:37 +0100
Subject: [PATCH 12/20] Make ruff happy

---
 src/apify/_actor.py                           |   3 +-
 src/apify/storages/_actor_inputs.py           |  36 +++--
 src/apify/storages/_known_actor_input_keys.py |  17 +--
 tests/unit/actor/test_actor_inputs.py         | 128 +++++++++++-------
 4 files changed, 114 insertions(+), 70 deletions(-)

diff --git a/src/apify/_actor.py b/src/apify/_actor.py
index f60a99df..4f3f032f 100644
--- a/src/apify/_actor.py
+++ b/src/apify/_actor.py
@@ -8,7 +8,6 @@
 
 from lazy_object_proxy import Proxy
 from pydantic import AliasChoices
-from typing_extensions import Self
 
 from apify_client import ApifyClientAsync
 from apify_shared.consts import ActorEnvVars, ActorExitCodes, ApifyEnvVars
@@ -31,6 +30,8 @@
     import logging
     from types import TracebackType
 
+    from typing_extensions import Self
+
     from crawlee.proxy_configuration import _NewUrlFunction
 
     from apify._models import Webhook
diff --git a/src/apify/storages/_actor_inputs.py b/src/apify/storages/_actor_inputs.py
index 9437a578..524ac70b 100644
--- a/src/apify/storages/_actor_inputs.py
+++ b/src/apify/storages/_actor_inputs.py
@@ -23,12 +23,12 @@
 )
 
 
-
 class _RequestDetails(BaseModel):
     method: HttpMethod
     payload: str = ''
     headers: dict[str, str] = Field(default_factory=dict)
-    user_data: dict[str, str]= Field(default_factory=dict, alias=ActorInputKeys.startUrls.userData)
+    user_data: dict[str, str] = Field(default_factory=dict, alias=ActorInputKeys.startUrls.userData)
+
 
 class _RequestsFromUrlInput(_RequestDetails):
     requests_from_url: str = Field(alias=ActorInputKeys.startUrls.requestsFromUrl)
@@ -37,6 +37,7 @@ class _RequestsFromUrlInput(_RequestDetails):
 class _SimpleUrlInput(_RequestDetails):
     url: str
 
+
 class Input(BaseModel):
     model_config = ConfigDict(arbitrary_types_allowed=True)
     start_urls: RequestList
@@ -45,11 +46,13 @@ class Input(BaseModel):
     async def read(cls, raw_input: dict[str, Any], http_client: BaseHttpClient | None = None) -> Self:
         if ActorInputKeys.startUrls in raw_input:
             request_list = await _create_request_list(
-                actor_start_urls_input=raw_input[ActorInputKeys.startUrls], http_client=http_client)
+                actor_start_urls_input=raw_input[ActorInputKeys.startUrls], http_client=http_client
+            )
         else:
             request_list = RequestList()
         return cls(start_urls=request_list)
 
+
 async def _create_request_list(
     *, actor_start_urls_input: list[dict[str, Any]], http_client: BaseHttpClient | None = None
 ) -> RequestList:
@@ -69,10 +72,13 @@ async def _create_request_list(
     if not http_client:
         http_client = HttpxHttpClient()
     simple_url_requests_inputs = [
-        _SimpleUrlInput(**request_input) for request_input in actor_start_urls_input
-        if ActorInputKeys.startUrls.url in request_input]
+        _SimpleUrlInput(**request_input)
+        for request_input in actor_start_urls_input
+        if ActorInputKeys.startUrls.url in request_input
+    ]
     remote_url_requests_inputs = [
-        _RequestsFromUrlInput(**request_input) for request_input in actor_start_urls_input
+        _RequestsFromUrlInput(**request_input)
+        for request_input in actor_start_urls_input
         if ActorInputKeys.startUrls.requestsFromUrl in request_input
     ]
 
@@ -109,12 +115,18 @@ async def _create_requests_from_url(
     def create_requests_from_response(request_input: _RequestsFromUrlInput, task: Task) -> None:
         """Callback to scrape response body with regexp and create Requests from matches."""
         matches = re.finditer(URL_NO_COMMAS_REGEX, task.result().read().decode('utf-8'))
-        created_requests.extend([Request.from_url(
-            match.group(0),
-            method=request_input.method,
-            payload=request_input.payload.encode('utf-8'),
-            headers=request_input.headers,
-            user_data=request_input.user_data) for match in matches])
+        created_requests.extend(
+            [
+                Request.from_url(
+                    match.group(0),
+                    method=request_input.method,
+                    payload=request_input.payload.encode('utf-8'),
+                    headers=request_input.headers,
+                    user_data=request_input.user_data,
+                )
+                for match in matches
+            ]
+        )
 
     remote_url_requests = []
     for remote_url_requests_input in remote_url_requests_inputs:
diff --git a/src/apify/storages/_known_actor_input_keys.py b/src/apify/storages/_known_actor_input_keys.py
index 49347393..2283a056 100644
--- a/src/apify/storages/_known_actor_input_keys.py
+++ b/src/apify/storages/_known_actor_input_keys.py
@@ -1,23 +1,23 @@
-
-
 class _KnownInputKey(str):
     __slots__ = ('_name',)
+
     def __init__(self, name: str) -> None:
         self._name = name
 
     def __str__(self) -> str:
         return self._name
 
-    def __repr__(self) ->str:
+    def __repr__(self) -> str:
         return str(self)
 
+
 class _StartUrls(_KnownInputKey):
-    url='url'
+    url = 'url'
     requestsFromUrl = 'requestsFromUrl'  # noqa: N815  # Intentional to respect actual naming of input keys.
-    method='method'
-    payload= 'payload'
-    userData='userData'  # noqa: N815  # Intentional to respect actual naming of input keys.
-    headers='headers'
+    method = 'method'
+    payload = 'payload'
+    userData = 'userData'  # noqa: N815  # Intentional to respect actual naming of input keys.
+    headers = 'headers'
 
 
 class _ActorInputKeys:
@@ -25,4 +25,5 @@ class _ActorInputKeys:
     startUrls: _StartUrls = _StartUrls('startUrls')  # noqa: N815  # Intentional to respect actual naming of input keys.
     # More inputs should be gradually added
 
+
 ActorInputKeys = _ActorInputKeys()
diff --git a/tests/unit/actor/test_actor_inputs.py b/tests/unit/actor/test_actor_inputs.py
index 08a3f155..736a1137 100644
--- a/tests/unit/actor/test_actor_inputs.py
+++ b/tests/unit/actor/test_actor_inputs.py
@@ -19,8 +19,11 @@
     'optional_input',
     [
         {},
-        {ActorInputKeys.startUrls.payload: 'some payload', ActorInputKeys.startUrls.userData:
-            {'some key': 'some value'}, ActorInputKeys.startUrls.headers: {'h1': 'v1', 'h2': 'v2'}},
+        {
+            ActorInputKeys.startUrls.payload: 'some payload',
+            ActorInputKeys.startUrls.userData: {'some key': 'some value'},
+            ActorInputKeys.startUrls.headers: {'h1': 'v1', 'h2': 'v2'},
+        },
     ],
     ids=['minimal', 'all_options'],
 )
@@ -28,8 +31,10 @@ async def test_actor_create_request_list_request_types(
     request_method: HttpMethod, optional_input: dict[str, Any]
 ) -> None:
     """Test proper request list generation from both minimal and full inputs for all method types for simple input."""
-    minimal_request_dict_input = {ActorInputKeys.startUrls.url: 'https://www.abc.com',
-                                  ActorInputKeys.startUrls.method: request_method}
+    minimal_request_dict_input = {
+        ActorInputKeys.startUrls.url: 'https://www.abc.com',
+        ActorInputKeys.startUrls.method: request_method,
+    }
     request_dict_input = {**minimal_request_dict_input, **optional_input}
     example_actor_input: dict[str, Any] = {ActorInputKeys.startUrls: [request_dict_input]}
 
@@ -75,17 +80,25 @@ def read(self) -> bytes:
 
 async def test_actor_create_request_list_from_url_correctly_send_requests() -> None:
     """Test that injected HttpClient's method send_request is called with properly passed arguments."""
-    example_actor_input: dict[str, Any] = {ActorInputKeys.startUrls: [
-        {ActorInputKeys.startUrls.requestsFromUrl: 'https://abc.dev/file.txt', ActorInputKeys.startUrls.method: 'GET'},
-        {ActorInputKeys.startUrls.requestsFromUrl: 'https://www.abc.dev/file2', ActorInputKeys.startUrls.method: 'PUT'},
-        {
-            ActorInputKeys.startUrls.requestsFromUrl: 'https://www.something.som',
-            ActorInputKeys.startUrls.method: 'POST',
-            ActorInputKeys.startUrls.headers: {'key': 'value'},
-            ActorInputKeys.startUrls.payload: 'some_payload',
-            ActorInputKeys.startUrls.userData: {'another_key': 'another_value'},
-        },
-    ]}
+    example_actor_input: dict[str, Any] = {
+        ActorInputKeys.startUrls: [
+            {
+                ActorInputKeys.startUrls.requestsFromUrl: 'https://abc.dev/file.txt',
+                ActorInputKeys.startUrls.method: 'GET',
+            },
+            {
+                ActorInputKeys.startUrls.requestsFromUrl: 'https://www.abc.dev/file2',
+                ActorInputKeys.startUrls.method: 'PUT',
+            },
+            {
+                ActorInputKeys.startUrls.requestsFromUrl: 'https://www.something.som',
+                ActorInputKeys.startUrls.method: 'POST',
+                ActorInputKeys.startUrls.headers: {'key': 'value'},
+                ActorInputKeys.startUrls.payload: 'some_payload',
+                ActorInputKeys.startUrls.userData: {'another_key': 'another_value'},
+            },
+        ]
+    }
 
     mocked_read_outputs = ('' for url in example_actor_input[ActorInputKeys.startUrls])
     http_client = HttpxHttpClient()
@@ -117,11 +130,19 @@ async def test_actor_create_request_list_from_url() -> None:
         )
     )
 
-    example_actor_input:dict[str, Any] = {ActorInputKeys.startUrls:[
-        {ActorInputKeys.startUrls.requestsFromUrl: 'https://abc.dev/file.txt', ActorInputKeys.startUrls.method: 'GET'},
-        {ActorInputKeys.startUrls.url: expected_simple_url, ActorInputKeys.startUrls.method: 'GET'},
-        {ActorInputKeys.startUrls.requestsFromUrl: 'https://www.abc.dev/file2', ActorInputKeys.startUrls.method: 'GET'},
-    ]}
+    example_actor_input: dict[str, Any] = {
+        ActorInputKeys.startUrls: [
+            {
+                ActorInputKeys.startUrls.requestsFromUrl: 'https://abc.dev/file.txt',
+                ActorInputKeys.startUrls.method: 'GET',
+            },
+            {ActorInputKeys.startUrls.url: expected_simple_url, ActorInputKeys.startUrls.method: 'GET'},
+            {
+                ActorInputKeys.startUrls.requestsFromUrl: 'https://www.abc.dev/file2',
+                ActorInputKeys.startUrls.method: 'GET',
+            },
+        ]
+    }
 
     http_client = HttpxHttpClient()
     with mock.patch.object(http_client, 'send_request', return_value=_create_dummy_response(response_bodies)):
@@ -133,7 +154,8 @@ async def test_actor_create_request_list_from_url() -> None:
     # Check correctly created requests' urls in request list
     assert {generated_request.url for generated_request in generated_requests} == expected_urls
 
-async def test_actor_create_request_list_from_url_additional_inputs()  -> None:
+
+async def test_actor_create_request_list_from_url_additional_inputs() -> None:
     """Test that all generated request properties are correctly populated from input values."""
     expected_simple_url = 'https://www.someurl.com'
     example_start_url_input = {
@@ -141,8 +163,9 @@ async def test_actor_create_request_list_from_url_additional_inputs()  -> None:
         ActorInputKeys.startUrls.method: 'POST',
         ActorInputKeys.startUrls.headers: {'key': 'value'},
         ActorInputKeys.startUrls.payload: 'some_payload',
-        ActorInputKeys.startUrls.userData: {'another_key': 'another_value'}}
-    example_actor_input: dict[str, Any] = {ActorInputKeys.startUrls:[example_start_url_input]}
+        ActorInputKeys.startUrls.userData: {'another_key': 'another_value'},
+    }
+    example_actor_input: dict[str, Any] = {ActorInputKeys.startUrls: [example_start_url_input]}
     response_bodies = iter((expected_simple_url,))
     http_client = HttpxHttpClient()
     with mock.patch.object(http_client, 'send_request', return_value=_create_dummy_response(response_bodies)):
@@ -162,43 +185,50 @@ async def test_actor_create_request_list_from_url_additional_inputs()  -> None:
     assert request.user_data == expected_user_data
 
 
-@pytest.mark.parametrize('true_positive', [
-    'http://www.something.com',
-    'https://www.something.net',
-    'http://nowww.cz',
-    'https://with-hypen.com',
-    'http://number1.com',
-    'http://www.number.123.abc',
-    'http://many.dots.com',
-    'http://a.com',
-    'http://www.something.com/somethignelse'
-    'http://www.something.com/somethignelse.txt',
-    'http://non-english-chars-áíéåü.com',
-    'http://www.port.com:1234',
-    'http://username:password@something.else.com'
-])
+@pytest.mark.parametrize(
+    'true_positive',
+    [
+        'http://www.something.com',
+        'https://www.something.net',
+        'http://nowww.cz',
+        'https://with-hypen.com',
+        'http://number1.com',
+        'http://www.number.123.abc',
+        'http://many.dots.com',
+        'http://a.com',
+        'http://www.something.com/somethignelse' 'http://www.something.com/somethignelse.txt',
+        'http://non-english-chars-áíéåü.com',
+        'http://www.port.com:1234',
+        'http://username:password@something.else.com',
+    ],
+)
 def test_url_no_commas_regex_true_positives(true_positive: str) -> None:
-    example_string= f'Some text {true_positive} some more text'
+    example_string = f'Some text {true_positive} some more text'
     matches = list(re.finditer(URL_NO_COMMAS_REGEX, example_string))
     assert len(matches) == 1
     assert matches[0].group(0) == true_positive
 
-@pytest.mark.parametrize('false_positive',[
-    'http://www.a',
-    'http://a',
-    'http://a.a',
-    'http://123.456',
-    'www.something.com',
-    'http:www.something.com',
-])
+
+@pytest.mark.parametrize(
+    'false_positive',
+    [
+        'http://www.a',
+        'http://a',
+        'http://a.a',
+        'http://123.456',
+        'www.something.com',
+        'http:www.something.com',
+    ],
+)
 def test_url_no_commas_regex_false_positives(false_positive: str) -> None:
-    example_string= f'Some text {false_positive} some more text'
+    example_string = f'Some text {false_positive} some more text'
     matches = list(re.findall(URL_NO_COMMAS_REGEX, example_string))
     assert len(matches) == 0
 
+
 def test_url_no_commas_regex_multi_line() -> None:
     true_positives = ('http://www.something.com', 'http://www.else.com')
-    example_string= 'Some text {} some more text \n Some new line text {} ...'.format(*true_positives)
+    example_string = 'Some text {} some more text \n Some new line text {} ...'.format(*true_positives)
     matches = list(re.finditer(URL_NO_COMMAS_REGEX, example_string))
     assert len(matches) == 2
     assert {match.group(0) for match in matches} == set(true_positives)

From 629939ef904c1e85976905812e213bca8e0ec264 Mon Sep 17 00:00:00 2001
From: Josef Prochazka <josef.prochazka@apify.com>
Date: Tue, 19 Nov 2024 08:45:44 +0100
Subject: [PATCH 13/20] Remove Input class

It had too many assumptions that users might not be interested in.
Users should create such Input helper classes based on their specific inputs and their names.
---
 src/apify/storages/__init__.py        |   4 +-
 src/apify/storages/_actor_inputs.py   |  28 ++-----
 tests/unit/actor/test_actor_inputs.py | 105 ++++++++++++--------------
 3 files changed, 56 insertions(+), 81 deletions(-)

diff --git a/src/apify/storages/__init__.py b/src/apify/storages/__init__.py
index 8fd33ba3..1c77e7b3 100644
--- a/src/apify/storages/__init__.py
+++ b/src/apify/storages/__init__.py
@@ -1,5 +1,5 @@
 from crawlee.storages import Dataset, KeyValueStore, RequestQueue
 
-from ._actor_inputs import Input
+from ._actor_inputs import create_request_list
 
-__all__ = ['Dataset', 'KeyValueStore', 'RequestQueue', 'Input']
+__all__ = ['Dataset', 'KeyValueStore', 'RequestQueue', 'create_request_list']
diff --git a/src/apify/storages/_actor_inputs.py b/src/apify/storages/_actor_inputs.py
index 524ac70b..675e7fb2 100644
--- a/src/apify/storages/_actor_inputs.py
+++ b/src/apify/storages/_actor_inputs.py
@@ -4,12 +4,9 @@
 import re
 from asyncio import Task
 from functools import partial
-from typing import TYPE_CHECKING, Any
+from typing import Any
 
-from pydantic import BaseModel, ConfigDict, Field
-
-if TYPE_CHECKING:
-    from typing_extensions import Self
+from pydantic import BaseModel, Field
 
 from crawlee import Request
 from crawlee._types import HttpMethod
@@ -24,7 +21,7 @@
 
 
 class _RequestDetails(BaseModel):
-    method: HttpMethod
+    method: HttpMethod = 'GET'
     payload: str = ''
     headers: dict[str, str] = Field(default_factory=dict)
     user_data: dict[str, str] = Field(default_factory=dict, alias=ActorInputKeys.startUrls.userData)
@@ -38,23 +35,8 @@ class _SimpleUrlInput(_RequestDetails):
     url: str
 
 
-class Input(BaseModel):
-    model_config = ConfigDict(arbitrary_types_allowed=True)
-    start_urls: RequestList
-
-    @classmethod
-    async def read(cls, raw_input: dict[str, Any], http_client: BaseHttpClient | None = None) -> Self:
-        if ActorInputKeys.startUrls in raw_input:
-            request_list = await _create_request_list(
-                actor_start_urls_input=raw_input[ActorInputKeys.startUrls], http_client=http_client
-            )
-        else:
-            request_list = RequestList()
-        return cls(start_urls=request_list)
-
-
-async def _create_request_list(
-    *, actor_start_urls_input: list[dict[str, Any]], http_client: BaseHttpClient | None = None
+async def create_request_list(
+    actor_start_urls_input: list[dict[str, Any]], http_client: BaseHttpClient | None = None
 ) -> RequestList:
     """Creates RequestList from Actor input requestListSources.
 
diff --git a/tests/unit/actor/test_actor_inputs.py b/tests/unit/actor/test_actor_inputs.py
index 736a1137..91c253f0 100644
--- a/tests/unit/actor/test_actor_inputs.py
+++ b/tests/unit/actor/test_actor_inputs.py
@@ -11,7 +11,7 @@
 from crawlee._types import HttpHeaders, HttpMethod
 from crawlee.http_clients import HttpResponse, HttpxHttpClient
 
-from apify.storages._actor_inputs import URL_NO_COMMAS_REGEX, ActorInputKeys, Input
+from apify.storages._actor_inputs import URL_NO_COMMAS_REGEX, ActorInputKeys, create_request_list
 
 
 @pytest.mark.parametrize('request_method', get_args(HttpMethod))
@@ -36,24 +36,22 @@ async def test_actor_create_request_list_request_types(
         ActorInputKeys.startUrls.method: request_method,
     }
     request_dict_input = {**minimal_request_dict_input, **optional_input}
-    example_actor_input: dict[str, Any] = {ActorInputKeys.startUrls: [request_dict_input]}
 
-    generated_input = await Input.read(example_actor_input)
+    request_list = await create_request_list([request_dict_input])
+    assert not await request_list.is_empty()
+    request = await request_list.fetch_next_request()
+    assert request is not None
+    assert await request_list.is_empty()
 
-    assert not await generated_input.start_urls.is_empty()
-    generated_request = await generated_input.start_urls.fetch_next_request()
-    assert generated_request is not None
-    assert await generated_input.start_urls.is_empty()
-
-    assert generated_request.method == request_dict_input[ActorInputKeys.startUrls.method]
-    assert generated_request.url == request_dict_input[ActorInputKeys.startUrls.url]
-    assert generated_request.payload == request_dict_input.get(ActorInputKeys.startUrls.payload, '').encode('utf-8')
+    assert request.method == request_dict_input[ActorInputKeys.startUrls.method]
+    assert request.url == request_dict_input[ActorInputKeys.startUrls.url]
+    assert request.payload == request_dict_input.get(ActorInputKeys.startUrls.payload, '').encode('utf-8')
     expected_user_data = UserData()
     if ActorInputKeys.startUrls.userData in optional_input:
         for key, value in optional_input[ActorInputKeys.startUrls.userData].items():
             expected_user_data[key] = value
-    assert generated_request.user_data == expected_user_data
-    assert generated_request.headers.root == optional_input.get(ActorInputKeys.startUrls.headers, {})
+    assert request.user_data == expected_user_data
+    assert request.headers.root == optional_input.get(ActorInputKeys.startUrls.headers, {})
 
 
 def _create_dummy_response(read_output: Iterator[str]) -> HttpResponse:
@@ -80,39 +78,37 @@ def read(self) -> bytes:
 
 async def test_actor_create_request_list_from_url_correctly_send_requests() -> None:
     """Test that injected HttpClient's method send_request is called with properly passed arguments."""
-    example_actor_input: dict[str, Any] = {
-        ActorInputKeys.startUrls: [
-            {
-                ActorInputKeys.startUrls.requestsFromUrl: 'https://abc.dev/file.txt',
-                ActorInputKeys.startUrls.method: 'GET',
-            },
-            {
-                ActorInputKeys.startUrls.requestsFromUrl: 'https://www.abc.dev/file2',
-                ActorInputKeys.startUrls.method: 'PUT',
-            },
-            {
-                ActorInputKeys.startUrls.requestsFromUrl: 'https://www.something.som',
-                ActorInputKeys.startUrls.method: 'POST',
-                ActorInputKeys.startUrls.headers: {'key': 'value'},
-                ActorInputKeys.startUrls.payload: 'some_payload',
-                ActorInputKeys.startUrls.userData: {'another_key': 'another_value'},
-            },
-        ]
-    }
+    actor_start_urls_input: list[dict[str, Any]] = [
+        {
+            ActorInputKeys.startUrls.requestsFromUrl: 'https://abc.dev/file.txt',
+            ActorInputKeys.startUrls.method: 'GET',
+        },
+        {
+            ActorInputKeys.startUrls.requestsFromUrl: 'https://www.abc.dev/file2',
+            ActorInputKeys.startUrls.method: 'PUT',
+        },
+        {
+            ActorInputKeys.startUrls.requestsFromUrl: 'https://www.something.som',
+            ActorInputKeys.startUrls.method: 'POST',
+            ActorInputKeys.startUrls.headers: {'key': 'value'},
+            ActorInputKeys.startUrls.payload: 'some_payload',
+            ActorInputKeys.startUrls.userData: {'another_key': 'another_value'},
+        },
+    ]
 
-    mocked_read_outputs = ('' for url in example_actor_input[ActorInputKeys.startUrls])
+    mocked_read_outputs = ('' for url in actor_start_urls_input)
     http_client = HttpxHttpClient()
     with mock.patch.object(
         http_client, 'send_request', return_value=_create_dummy_response(mocked_read_outputs)
     ) as mocked_send_request:
-        await Input.read(example_actor_input, http_client=http_client)
+        await create_request_list(actor_start_urls_input, http_client=http_client)
 
     expected_calls = [
         call(
             method='GET',
             url=example_input[ActorInputKeys.startUrls.requestsFromUrl],
         )
-        for example_input in example_actor_input[ActorInputKeys.startUrls]
+        for example_input in actor_start_urls_input
     ]
     mocked_send_request.assert_has_calls(expected_calls)
 
@@ -130,25 +126,23 @@ async def test_actor_create_request_list_from_url() -> None:
         )
     )
 
-    example_actor_input: dict[str, Any] = {
-        ActorInputKeys.startUrls: [
-            {
-                ActorInputKeys.startUrls.requestsFromUrl: 'https://abc.dev/file.txt',
-                ActorInputKeys.startUrls.method: 'GET',
-            },
-            {ActorInputKeys.startUrls.url: expected_simple_url, ActorInputKeys.startUrls.method: 'GET'},
-            {
-                ActorInputKeys.startUrls.requestsFromUrl: 'https://www.abc.dev/file2',
-                ActorInputKeys.startUrls.method: 'GET',
-            },
-        ]
-    }
+    actor_start_urls_input = [
+        {
+            ActorInputKeys.startUrls.requestsFromUrl: 'https://abc.dev/file.txt',
+            ActorInputKeys.startUrls.method: 'GET',
+        },
+        {ActorInputKeys.startUrls.url: expected_simple_url, ActorInputKeys.startUrls.method: 'GET'},
+        {
+            ActorInputKeys.startUrls.requestsFromUrl: 'https://www.abc.dev/file2',
+            ActorInputKeys.startUrls.method: 'GET',
+        },
+    ]
 
     http_client = HttpxHttpClient()
     with mock.patch.object(http_client, 'send_request', return_value=_create_dummy_response(response_bodies)):
-        generated_input = await Input.read(example_actor_input, http_client=http_client)
+        request_list = await create_request_list(actor_start_urls_input, http_client=http_client)
         generated_requests = []
-        while request := await generated_input.start_urls.fetch_next_request():
+        while request := await request_list.fetch_next_request():
             generated_requests.append(request)
 
     # Check correctly created requests' urls in request list
@@ -158,29 +152,28 @@ async def test_actor_create_request_list_from_url() -> None:
 async def test_actor_create_request_list_from_url_additional_inputs() -> None:
     """Test that all generated request properties are correctly populated from input values."""
     expected_simple_url = 'https://www.someurl.com'
-    example_start_url_input = {
+    example_start_url_input: dict[str, Any] = {
         ActorInputKeys.startUrls.requestsFromUrl: 'https://crawlee.dev/file.txt',
         ActorInputKeys.startUrls.method: 'POST',
         ActorInputKeys.startUrls.headers: {'key': 'value'},
         ActorInputKeys.startUrls.payload: 'some_payload',
         ActorInputKeys.startUrls.userData: {'another_key': 'another_value'},
     }
-    example_actor_input: dict[str, Any] = {ActorInputKeys.startUrls: [example_start_url_input]}
+
     response_bodies = iter((expected_simple_url,))
     http_client = HttpxHttpClient()
     with mock.patch.object(http_client, 'send_request', return_value=_create_dummy_response(response_bodies)):
-        generated_input = await Input.read(example_actor_input, http_client=http_client)
-        request = await generated_input.start_urls.fetch_next_request()
+        request_list = await create_request_list([example_start_url_input], http_client=http_client)
+        request = await request_list.fetch_next_request()
 
     # Check all properties correctly created for request
-    example_start_url_input = example_actor_input[ActorInputKeys.startUrls][0]
     assert request
     assert request.url == expected_simple_url
     assert request.method == example_start_url_input[ActorInputKeys.startUrls.method]
     assert request.headers.root == example_start_url_input[ActorInputKeys.startUrls.headers]
     assert request.payload == str(example_start_url_input[ActorInputKeys.startUrls.payload]).encode('utf-8')
     expected_user_data = UserData()
-    for key, value in example_actor_input[ActorInputKeys.startUrls][0][ActorInputKeys.startUrls.userData].items():
+    for key, value in example_start_url_input[ActorInputKeys.startUrls.userData].items():
         expected_user_data[key] = value
     assert request.user_data == expected_user_data
 

From 910d11f142c959597f9c1bac29b35e03e32bb700 Mon Sep 17 00:00:00 2001
From: Josef Prochazka <josef.prochazka@apify.com>
Date: Tue, 19 Nov 2024 13:43:11 +0100
Subject: [PATCH 14/20] Review comments.

Update TCH001, TCH002, TCH003 uses.
---
 pyproject.toml                                |  2 +-
 src/apify/_configuration.py                   |  1 -
 src/apify/_models.py                          |  1 -
 src/apify/scrapy/middlewares/apify_proxy.py   |  6 +-
 .../scrapy/pipelines/actor_dataset_push.py    |  5 +-
 src/apify/scrapy/scheduler.py                 |  5 +-
 src/apify/scrapy/utils.py                     |  5 +-
 src/apify/storages/__init__.py                |  4 +-
 src/apify/storages/_known_actor_input_keys.py | 29 ------
 .../{_actor_inputs.py => request_list.py}     | 81 +++++++++-------
 ...t_actor_inputs.py => test_request_list.py} | 96 ++++++++++---------
 11 files changed, 115 insertions(+), 120 deletions(-)
 delete mode 100644 src/apify/storages/_known_actor_input_keys.py
 rename src/apify/storages/{_actor_inputs.py => request_list.py} (61%)
 rename tests/unit/actor/{test_actor_inputs.py => test_request_list.py} (63%)

diff --git a/pyproject.toml b/pyproject.toml
index c3a01c41..7647dfa0 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -142,7 +142,7 @@ docstring-quotes = "double"
 inline-quotes = "single"
 
 [tool.ruff.lint.flake8-type-checking]
-runtime-evaluated-base-classes = ["pydantic.BaseModel"]
+runtime-evaluated-base-classes = ["pydantic.BaseModel", "crawlee.configuration.Configuration", "ApifyHttpProxyMiddleware"]
 
 [tool.ruff.lint.flake8-builtins]
 builtins-ignorelist = ["id"]
diff --git a/src/apify/_configuration.py b/src/apify/_configuration.py
index ab249284..00bb0336 100644
--- a/src/apify/_configuration.py
+++ b/src/apify/_configuration.py
@@ -1,4 +1,3 @@
-# ruff: noqa: TCH001 TCH002 TCH003 (so that pydantic annotations work)
 from __future__ import annotations
 
 from datetime import datetime, timedelta
diff --git a/src/apify/_models.py b/src/apify/_models.py
index 5963ec9a..f9b2f9a8 100644
--- a/src/apify/_models.py
+++ b/src/apify/_models.py
@@ -1,4 +1,3 @@
-# ruff: noqa: TCH001 TCH002 TCH003 (Pydantic)
 from __future__ import annotations
 
 from datetime import datetime, timedelta
diff --git a/src/apify/scrapy/middlewares/apify_proxy.py b/src/apify/scrapy/middlewares/apify_proxy.py
index 3a7f7b75..b1dc2b88 100644
--- a/src/apify/scrapy/middlewares/apify_proxy.py
+++ b/src/apify/scrapy/middlewares/apify_proxy.py
@@ -1,11 +1,13 @@
 from __future__ import annotations
 
+from typing import TYPE_CHECKING
 from urllib.parse import ParseResult, urlparse
 
 try:
-    from scrapy import Request, Spider  # noqa: TCH002
+    if TYPE_CHECKING:
+        from scrapy import Request, Spider
+        from scrapy.crawler import Crawler
     from scrapy.core.downloader.handlers.http11 import TunnelError
-    from scrapy.crawler import Crawler  # noqa: TCH002
     from scrapy.exceptions import NotConfigured
 except ImportError as exc:
     raise ImportError(
diff --git a/src/apify/scrapy/pipelines/actor_dataset_push.py b/src/apify/scrapy/pipelines/actor_dataset_push.py
index 15026475..d2d983cc 100644
--- a/src/apify/scrapy/pipelines/actor_dataset_push.py
+++ b/src/apify/scrapy/pipelines/actor_dataset_push.py
@@ -1,9 +1,12 @@
 from __future__ import annotations
 
+from typing import TYPE_CHECKING
+
 from itemadapter.adapter import ItemAdapter
 
 try:
-    from scrapy import Item, Spider  # noqa: TCH002
+    if TYPE_CHECKING:
+        from scrapy import Item, Spider
 except ImportError as exc:
     raise ImportError(
         'To use this module, you need to install the "scrapy" extra. Run "pip install apify[scrapy]".',
diff --git a/src/apify/scrapy/scheduler.py b/src/apify/scrapy/scheduler.py
index 849e5376..da79ac64 100644
--- a/src/apify/scrapy/scheduler.py
+++ b/src/apify/scrapy/scheduler.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 import traceback
+from typing import TYPE_CHECKING
 
 from apify._configuration import Configuration
 from apify.apify_storage_client import ApifyStorageClient
@@ -8,8 +9,10 @@
 try:
     from scrapy import Spider
     from scrapy.core.scheduler import BaseScheduler
-    from scrapy.http.request import Request  # noqa: TCH002
     from scrapy.utils.reactor import is_asyncio_reactor_installed
+
+    if TYPE_CHECKING:
+        from scrapy.http.request import Request
 except ImportError as exc:
     raise ImportError(
         'To use this module, you need to install the "scrapy" extra. Run "pip install apify[scrapy]".',
diff --git a/src/apify/scrapy/utils.py b/src/apify/scrapy/utils.py
index f22a60de..1f92d4ff 100644
--- a/src/apify/scrapy/utils.py
+++ b/src/apify/scrapy/utils.py
@@ -2,14 +2,17 @@
 
 import asyncio
 from base64 import b64encode
+from typing import TYPE_CHECKING
 from urllib.parse import unquote
 
 from apify_shared.utils import ignore_docs
 
 try:
-    from scrapy.settings import Settings  # noqa: TCH002
     from scrapy.utils.project import get_project_settings
     from scrapy.utils.python import to_bytes
+
+    if TYPE_CHECKING:
+        from scrapy.settings import Settings
 except ImportError as exc:
     raise ImportError(
         'To use this module, you need to install the "scrapy" extra. For example, if you use pip, run '
diff --git a/src/apify/storages/__init__.py b/src/apify/storages/__init__.py
index 1c77e7b3..fc812aa1 100644
--- a/src/apify/storages/__init__.py
+++ b/src/apify/storages/__init__.py
@@ -1,5 +1,5 @@
 from crawlee.storages import Dataset, KeyValueStore, RequestQueue
 
-from ._actor_inputs import create_request_list
+from .request_list import RequestList
 
-__all__ = ['Dataset', 'KeyValueStore', 'RequestQueue', 'create_request_list']
+__all__ = ['Dataset', 'KeyValueStore', 'RequestQueue', 'RequestList']
diff --git a/src/apify/storages/_known_actor_input_keys.py b/src/apify/storages/_known_actor_input_keys.py
deleted file mode 100644
index 2283a056..00000000
--- a/src/apify/storages/_known_actor_input_keys.py
+++ /dev/null
@@ -1,29 +0,0 @@
-class _KnownInputKey(str):
-    __slots__ = ('_name',)
-
-    def __init__(self, name: str) -> None:
-        self._name = name
-
-    def __str__(self) -> str:
-        return self._name
-
-    def __repr__(self) -> str:
-        return str(self)
-
-
-class _StartUrls(_KnownInputKey):
-    url = 'url'
-    requestsFromUrl = 'requestsFromUrl'  # noqa: N815  # Intentional to respect actual naming of input keys.
-    method = 'method'
-    payload = 'payload'
-    userData = 'userData'  # noqa: N815  # Intentional to respect actual naming of input keys.
-    headers = 'headers'
-
-
-class _ActorInputKeys:
-    # Helper class to have actor input strings all in one place and easy to use with code completion.
-    startUrls: _StartUrls = _StartUrls('startUrls')  # noqa: N815  # Intentional to respect actual naming of input keys.
-    # More inputs should be gradually added
-
-
-ActorInputKeys = _ActorInputKeys()
diff --git a/src/apify/storages/_actor_inputs.py b/src/apify/storages/request_list.py
similarity index 61%
rename from src/apify/storages/_actor_inputs.py
rename to src/apify/storages/request_list.py
index 675e7fb2..a3e11006 100644
--- a/src/apify/storages/_actor_inputs.py
+++ b/src/apify/storages/request_list.py
@@ -4,16 +4,14 @@
 import re
 from asyncio import Task
 from functools import partial
-from typing import Any
+from typing import Any, Union
 
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, Field, TypeAdapter
 
 from crawlee import Request
 from crawlee._types import HttpMethod
 from crawlee.http_clients import BaseHttpClient, HttpxHttpClient
-from crawlee.storages import RequestList
-
-from ._known_actor_input_keys import ActorInputKeys
+from crawlee.storages import RequestList as CrawleeRequestList
 
 URL_NO_COMMAS_REGEX = re.compile(
     r'https?:\/\/(www\.)?([^\W_]|[^\W_][-\w0-9@:%._+~#=]{0,254}[^\W_])\.[a-z]{2,63}(:\d{1,5})?(\/[-\w@:%+.~#?&/=()]*)?'
@@ -24,50 +22,63 @@ class _RequestDetails(BaseModel):
     method: HttpMethod = 'GET'
     payload: str = ''
     headers: dict[str, str] = Field(default_factory=dict)
-    user_data: dict[str, str] = Field(default_factory=dict, alias=ActorInputKeys.startUrls.userData)
+    user_data: dict[str, str] = Field(default_factory=dict, alias='userData')
 
 
 class _RequestsFromUrlInput(_RequestDetails):
-    requests_from_url: str = Field(alias=ActorInputKeys.startUrls.requestsFromUrl)
+    requests_from_url: str = Field(alias='requestsFromUrl')
 
 
 class _SimpleUrlInput(_RequestDetails):
     url: str
 
 
-async def create_request_list(
-    actor_start_urls_input: list[dict[str, Any]], http_client: BaseHttpClient | None = None
+url_input_adapter = TypeAdapter(list[Union[_RequestsFromUrlInput, _SimpleUrlInput]])
+
+
+class RequestList(CrawleeRequestList):
+    """Extends crawlee RequestList."""
+
+    @classmethod
+    async def open(
+        cls,
+        name: str | None = None,
+        actor_start_urls_input: list[dict[str, Any]] | None = None,
+        http_client: BaseHttpClient | None = None,
+    ) -> RequestList:
+        """Creates RequestList from Actor input requestListSources.
+
+        name is name of the returned RequestList
+        actor_start_urls_input  can contain list dicts with either url or requestsFromUrl key
+        http_client is client that will be used to send get request to url defined in requestsFromUrl
+
+        Example actor_start_urls_input:
+            [
+                # Gather urls from response body.
+                {'requestsFromUrl': 'https://crawlee.dev/file.txt', 'method': 'GET'},
+                # Directly include this url.
+                {'url': 'https://crawlee.dev', 'method': 'GET'}
+            ]
+        """
+        actor_start_urls_input = actor_start_urls_input or []
+        return await _create_request_list(name, actor_start_urls_input, http_client)
+
+
+async def _create_request_list(
+    name: str | None, actor_start_urls_input: list[dict[str, Any]], http_client: BaseHttpClient | None
 ) -> RequestList:
-    """Creates RequestList from Actor input requestListSources.
-
-    actor_start_urls_input  can contain list dicts with either url or requestsFromUrl key
-    http_client is client that will be used to send get request to url defined in requestsFromUrl
-
-    Example:
-        actor_start_urls_input = [
-            # Gather urls from response body.
-            {'requestsFromUrl': 'https://crawlee.dev/file.txt', 'method': 'GET'},
-            # Directly include this url.
-            {'url': 'https://crawlee.dev', 'method': 'GET'}
-        ]
-    """
     if not http_client:
         http_client = HttpxHttpClient()
-    simple_url_requests_inputs = [
-        _SimpleUrlInput(**request_input)
-        for request_input in actor_start_urls_input
-        if ActorInputKeys.startUrls.url in request_input
-    ]
-    remote_url_requests_inputs = [
-        _RequestsFromUrlInput(**request_input)
-        for request_input in actor_start_urls_input
-        if ActorInputKeys.startUrls.requestsFromUrl in request_input
-    ]
 
-    simple_url_requests = _create_requests_from_input(simple_url_requests_inputs)
-    remote_url_requests = await _create_requests_from_url(remote_url_requests_inputs, http_client=http_client)
+    ulr_inputs = url_input_adapter.validate_python(actor_start_urls_input)  # instance of list[Union[...]]
+
+    simple_url_inputs = [url_input for url_input in ulr_inputs if type(url_input) is _SimpleUrlInput]
+    remote_url_inputs = [url_input for url_input in ulr_inputs if type(url_input) is _RequestsFromUrlInput]
+
+    simple_url_requests = _create_requests_from_input(simple_url_inputs)
+    remote_url_requests = await _create_requests_from_url(remote_url_inputs, http_client=http_client)
 
-    return RequestList(requests=simple_url_requests + remote_url_requests)
+    return RequestList(name=name, requests=simple_url_requests + remote_url_requests)
 
 
 def _create_requests_from_input(simple_url_inputs: list[_SimpleUrlInput]) -> list[Request]:
diff --git a/tests/unit/actor/test_actor_inputs.py b/tests/unit/actor/test_request_list.py
similarity index 63%
rename from tests/unit/actor/test_actor_inputs.py
rename to tests/unit/actor/test_request_list.py
index 91c253f0..e3bbfb36 100644
--- a/tests/unit/actor/test_actor_inputs.py
+++ b/tests/unit/actor/test_request_list.py
@@ -11,7 +11,7 @@
 from crawlee._types import HttpHeaders, HttpMethod
 from crawlee.http_clients import HttpResponse, HttpxHttpClient
 
-from apify.storages._actor_inputs import URL_NO_COMMAS_REGEX, ActorInputKeys, create_request_list
+from apify.storages.request_list import URL_NO_COMMAS_REGEX, RequestList
 
 
 @pytest.mark.parametrize('request_method', get_args(HttpMethod))
@@ -20,38 +20,36 @@
     [
         {},
         {
-            ActorInputKeys.startUrls.payload: 'some payload',
-            ActorInputKeys.startUrls.userData: {'some key': 'some value'},
-            ActorInputKeys.startUrls.headers: {'h1': 'v1', 'h2': 'v2'},
+            'payload': 'some payload',
+            'userData': {'some key': 'some value'},
+            'headers': {'h1': 'v1', 'h2': 'v2'},
         },
     ],
     ids=['minimal', 'all_options'],
 )
-async def test_actor_create_request_list_request_types(
-    request_method: HttpMethod, optional_input: dict[str, Any]
-) -> None:
+async def test_request_list_open_request_types(request_method: HttpMethod, optional_input: dict[str, Any]) -> None:
     """Test proper request list generation from both minimal and full inputs for all method types for simple input."""
     minimal_request_dict_input = {
-        ActorInputKeys.startUrls.url: 'https://www.abc.com',
-        ActorInputKeys.startUrls.method: request_method,
+        'url': 'https://www.abc.com',
+        'method': request_method,
     }
     request_dict_input = {**minimal_request_dict_input, **optional_input}
 
-    request_list = await create_request_list([request_dict_input])
+    request_list = await RequestList.open(actor_start_urls_input=[request_dict_input])
     assert not await request_list.is_empty()
     request = await request_list.fetch_next_request()
     assert request is not None
     assert await request_list.is_empty()
 
-    assert request.method == request_dict_input[ActorInputKeys.startUrls.method]
-    assert request.url == request_dict_input[ActorInputKeys.startUrls.url]
-    assert request.payload == request_dict_input.get(ActorInputKeys.startUrls.payload, '').encode('utf-8')
+    assert request.method == request_dict_input['method']
+    assert request.url == request_dict_input['url']
+    assert request.payload == request_dict_input.get('payload', '').encode('utf-8')
     expected_user_data = UserData()
-    if ActorInputKeys.startUrls.userData in optional_input:
-        for key, value in optional_input[ActorInputKeys.startUrls.userData].items():
+    if 'userData' in optional_input:
+        for key, value in optional_input['userData'].items():
             expected_user_data[key] = value
     assert request.user_data == expected_user_data
-    assert request.headers.root == optional_input.get(ActorInputKeys.startUrls.headers, {})
+    assert request.headers.root == optional_input.get('headers', {})
 
 
 def _create_dummy_response(read_output: Iterator[str]) -> HttpResponse:
@@ -76,23 +74,23 @@ def read(self) -> bytes:
     return DummyResponse()
 
 
-async def test_actor_create_request_list_from_url_correctly_send_requests() -> None:
+async def test__request_list_open_from_url_correctly_send_requests() -> None:
     """Test that injected HttpClient's method send_request is called with properly passed arguments."""
     actor_start_urls_input: list[dict[str, Any]] = [
         {
-            ActorInputKeys.startUrls.requestsFromUrl: 'https://abc.dev/file.txt',
-            ActorInputKeys.startUrls.method: 'GET',
+            'requestsFromUrl': 'https://abc.dev/file.txt',
+            'method': 'GET',
         },
         {
-            ActorInputKeys.startUrls.requestsFromUrl: 'https://www.abc.dev/file2',
-            ActorInputKeys.startUrls.method: 'PUT',
+            'requestsFromUrl': 'https://www.abc.dev/file2',
+            'method': 'PUT',
         },
         {
-            ActorInputKeys.startUrls.requestsFromUrl: 'https://www.something.som',
-            ActorInputKeys.startUrls.method: 'POST',
-            ActorInputKeys.startUrls.headers: {'key': 'value'},
-            ActorInputKeys.startUrls.payload: 'some_payload',
-            ActorInputKeys.startUrls.userData: {'another_key': 'another_value'},
+            'requestsFromUrl': 'https://www.something.som',
+            'method': 'POST',
+            'headers': {'key': 'value'},
+            'payload': 'some_payload',
+            'userData': {'another_key': 'another_value'},
         },
     ]
 
@@ -101,19 +99,19 @@ async def test_actor_create_request_list_from_url_correctly_send_requests() -> N
     with mock.patch.object(
         http_client, 'send_request', return_value=_create_dummy_response(mocked_read_outputs)
     ) as mocked_send_request:
-        await create_request_list(actor_start_urls_input, http_client=http_client)
+        await RequestList.open(actor_start_urls_input=actor_start_urls_input, http_client=http_client)
 
     expected_calls = [
         call(
             method='GET',
-            url=example_input[ActorInputKeys.startUrls.requestsFromUrl],
+            url=example_input['requestsFromUrl'],
         )
         for example_input in actor_start_urls_input
     ]
     mocked_send_request.assert_has_calls(expected_calls)
 
 
-async def test_actor_create_request_list_from_url() -> None:
+async def test_request_list_open_from_url() -> None:
     """Test that create_request_list is correctly reading urls from remote url sources and also from simple input."""
     expected_simple_url = 'https://www.someurl.com'
     expected_remote_urls_1 = {'http://www.something.com', 'https://www.somethingelse.com', 'http://www.bla.net'}
@@ -128,19 +126,19 @@ async def test_actor_create_request_list_from_url() -> None:
 
     actor_start_urls_input = [
         {
-            ActorInputKeys.startUrls.requestsFromUrl: 'https://abc.dev/file.txt',
-            ActorInputKeys.startUrls.method: 'GET',
+            'requestsFromUrl': 'https://abc.dev/file.txt',
+            'method': 'GET',
         },
-        {ActorInputKeys.startUrls.url: expected_simple_url, ActorInputKeys.startUrls.method: 'GET'},
+        {'url': expected_simple_url, 'method': 'GET'},
         {
-            ActorInputKeys.startUrls.requestsFromUrl: 'https://www.abc.dev/file2',
-            ActorInputKeys.startUrls.method: 'GET',
+            'requestsFromUrl': 'https://www.abc.dev/file2',
+            'method': 'GET',
         },
     ]
 
     http_client = HttpxHttpClient()
     with mock.patch.object(http_client, 'send_request', return_value=_create_dummy_response(response_bodies)):
-        request_list = await create_request_list(actor_start_urls_input, http_client=http_client)
+        request_list = await RequestList.open(actor_start_urls_input=actor_start_urls_input, http_client=http_client)
         generated_requests = []
         while request := await request_list.fetch_next_request():
             generated_requests.append(request)
@@ -149,35 +147,41 @@ async def test_actor_create_request_list_from_url() -> None:
     assert {generated_request.url for generated_request in generated_requests} == expected_urls
 
 
-async def test_actor_create_request_list_from_url_additional_inputs() -> None:
+async def test_request_list_open_from_url_additional_inputs() -> None:
     """Test that all generated request properties are correctly populated from input values."""
     expected_simple_url = 'https://www.someurl.com'
     example_start_url_input: dict[str, Any] = {
-        ActorInputKeys.startUrls.requestsFromUrl: 'https://crawlee.dev/file.txt',
-        ActorInputKeys.startUrls.method: 'POST',
-        ActorInputKeys.startUrls.headers: {'key': 'value'},
-        ActorInputKeys.startUrls.payload: 'some_payload',
-        ActorInputKeys.startUrls.userData: {'another_key': 'another_value'},
+        'requestsFromUrl': 'https://crawlee.dev/file.txt',
+        'method': 'POST',
+        'headers': {'key': 'value'},
+        'payload': 'some_payload',
+        'userData': {'another_key': 'another_value'},
     }
 
     response_bodies = iter((expected_simple_url,))
     http_client = HttpxHttpClient()
     with mock.patch.object(http_client, 'send_request', return_value=_create_dummy_response(response_bodies)):
-        request_list = await create_request_list([example_start_url_input], http_client=http_client)
+        request_list = await RequestList.open(actor_start_urls_input=[example_start_url_input], http_client=http_client)
         request = await request_list.fetch_next_request()
 
     # Check all properties correctly created for request
     assert request
     assert request.url == expected_simple_url
-    assert request.method == example_start_url_input[ActorInputKeys.startUrls.method]
-    assert request.headers.root == example_start_url_input[ActorInputKeys.startUrls.headers]
-    assert request.payload == str(example_start_url_input[ActorInputKeys.startUrls.payload]).encode('utf-8')
+    assert request.method == example_start_url_input['method']
+    assert request.headers.root == example_start_url_input['headers']
+    assert request.payload == str(example_start_url_input['payload']).encode('utf-8')
     expected_user_data = UserData()
-    for key, value in example_start_url_input[ActorInputKeys.startUrls.userData].items():
+    for key, value in example_start_url_input['userData'].items():
         expected_user_data[key] = value
     assert request.user_data == expected_user_data
 
 
+async def test_request_list_open_name() -> None:
+    name = 'some_name'
+    request_list = await RequestList.open(name=name)
+    assert request_list.name == name
+
+
 @pytest.mark.parametrize(
     'true_positive',
     [

From 6ff9e9058afa3522cf3cc425b3249fc5d96dae37 Mon Sep 17 00:00:00 2001
From: Josef Prochazka <josef.prochazka@apify.com>
Date: Tue, 19 Nov 2024 13:45:30 +0100
Subject: [PATCH 15/20] Remove unnecessary pyproject setting value.

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 7647dfa0..bda4c509 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -142,7 +142,7 @@ docstring-quotes = "double"
 inline-quotes = "single"
 
 [tool.ruff.lint.flake8-type-checking]
-runtime-evaluated-base-classes = ["pydantic.BaseModel", "crawlee.configuration.Configuration", "ApifyHttpProxyMiddleware"]
+runtime-evaluated-base-classes = ["pydantic.BaseModel", "crawlee.configuration.Configuration"]
 
 [tool.ruff.lint.flake8-builtins]
 builtins-ignorelist = ["id"]

From 3f3314529e7929e823a0540262de45dd6456c1bb Mon Sep 17 00:00:00 2001
From: Josef Prochazka <josef.prochazka@apify.com>
Date: Tue, 19 Nov 2024 16:25:38 +0100
Subject: [PATCH 16/20] Addresing review comments

---
 src/apify/storages/__init__.py        |   2 +-
 src/apify/storages/_request_list.py   | 140 ++++++++++++++++++++++++++
 src/apify/storages/request_list.py    | 137 -------------------------
 tests/unit/actor/test_request_list.py |  35 ++++---
 4 files changed, 161 insertions(+), 153 deletions(-)
 create mode 100644 src/apify/storages/_request_list.py
 delete mode 100644 src/apify/storages/request_list.py

diff --git a/src/apify/storages/__init__.py b/src/apify/storages/__init__.py
index fc812aa1..63ac7af6 100644
--- a/src/apify/storages/__init__.py
+++ b/src/apify/storages/__init__.py
@@ -1,5 +1,5 @@
 from crawlee.storages import Dataset, KeyValueStore, RequestQueue
 
-from .request_list import RequestList
+from ._request_list import RequestList
 
 __all__ = ['Dataset', 'KeyValueStore', 'RequestQueue', 'RequestList']
diff --git a/src/apify/storages/_request_list.py b/src/apify/storages/_request_list.py
new file mode 100644
index 00000000..c4ca1d9a
--- /dev/null
+++ b/src/apify/storages/_request_list.py
@@ -0,0 +1,140 @@
+from __future__ import annotations
+
+import asyncio
+import re
+from asyncio import Task
+from functools import partial
+from typing import Annotated, Any, Union
+
+from pydantic import BaseModel, Field, TypeAdapter
+
+from crawlee import Request
+from crawlee._types import HttpMethod
+from crawlee.http_clients import BaseHttpClient, HttpxHttpClient
+from crawlee.storages import RequestList as CrawleeRequestList
+
+URL_NO_COMMAS_REGEX = re.compile(
+    r'https?:\/\/(www\.)?([^\W_]|[^\W_][-\w0-9@:%._+~#=]{0,254}[^\W_])\.[a-z]{2,63}(:\d{1,5})?(\/[-\w@:%+.~#?&/=()]*)?'
+)
+
+
+class _RequestDetails(BaseModel):
+    method: HttpMethod = 'GET'
+    payload: str = ''
+    headers: Annotated[dict[str, str], Field(default_factory=dict)] = {}
+    user_data: Annotated[dict[str, str], Field(default_factory=dict, alias='userData')] = {}
+
+
+class _RequestsFromUrlInput(_RequestDetails):
+    requests_from_url: str = Field(alias='requestsFromUrl')
+
+
+class _SimpleUrlInput(_RequestDetails):
+    url: str
+
+
+url_input_adapter = TypeAdapter(list[Union[_RequestsFromUrlInput, _SimpleUrlInput]])
+
+
+# @docs_group('Classes')  # Not yet available in crawlee
+class RequestList(CrawleeRequestList):
+    """Extends crawlee RequestList.
+
+    Method open is used to create RequestList from actor's requestListSources input.
+    """
+
+    @staticmethod
+    async def open(
+        name: str | None = None,
+        request_list_sources_input: list[dict[str, Any]] | None = None,
+        http_client: BaseHttpClient | None = None,
+    ) -> RequestList:
+        """Creates RequestList from Actor input requestListSources.
+
+        name is name of the returned RequestList
+        request_list_sources_input  can contain list dicts with either url or requestsFromUrl key
+        http_client is client that will be used to send get request to url defined in requestsFromUrl
+
+        Example request_list_sources_input:
+            [
+                # Gather urls from response body.
+                {'requestsFromUrl': 'https://crawlee.dev/file.txt', 'method': 'GET'},
+                # Directly include this url.
+                {'url': 'https://crawlee.dev', 'method': 'GET'}
+            ]
+        """
+        request_list_sources_input = request_list_sources_input or []
+        return await RequestList._create_request_list(name, request_list_sources_input, http_client)
+
+    @staticmethod
+    async def _create_request_list(
+        name: str | None, request_list_sources_input: list[dict[str, Any]], http_client: BaseHttpClient | None
+    ) -> RequestList:
+        if not http_client:
+            http_client = HttpxHttpClient()
+
+        ulr_inputs = url_input_adapter.validate_python(request_list_sources_input)  # instance of list[Union[...]]
+
+        simple_url_inputs = [url_input for url_input in ulr_inputs if type(url_input) is _SimpleUrlInput]
+        remote_url_inputs = [url_input for url_input in ulr_inputs if type(url_input) is _RequestsFromUrlInput]
+
+        simple_url_requests = RequestList._create_requests_from_input(simple_url_inputs)
+        remote_url_requests = await RequestList._fetch_requests_from_url(remote_url_inputs, http_client=http_client)
+
+        return RequestList(name=name, requests=simple_url_requests + remote_url_requests)
+
+    @staticmethod
+    def _create_requests_from_input(simple_url_inputs: list[_SimpleUrlInput]) -> list[Request]:
+        return [
+            Request.from_url(
+                method=request_input.method,
+                url=request_input.url,
+                payload=request_input.payload.encode('utf-8'),
+                headers=request_input.headers,
+                user_data=request_input.user_data,
+            )
+            for request_input in simple_url_inputs
+        ]
+
+    @staticmethod
+    async def _fetch_requests_from_url(
+        remote_url_requests_inputs: list[_RequestsFromUrlInput], http_client: BaseHttpClient
+    ) -> list[Request]:
+        """Crete list of requests from url.
+
+        Send GET requests to urls defined in each requests_from_url of remote_url_requests_inputs. Run extracting
+        callback on each response body and use URL_NO_COMMAS_REGEX regex to find all links. Create list of Requests from
+        collected links and additional inputs stored in other attributes of each remote_url_requests_inputs.
+        """
+        created_requests: list[Request] = []
+
+        def create_requests_from_response(request_input: _RequestsFromUrlInput, task: Task) -> None:
+            """Callback to scrape response body with regexp and create Requests from matches."""
+            matches = re.finditer(URL_NO_COMMAS_REGEX, task.result().read().decode('utf-8'))
+            created_requests.extend(
+                [
+                    Request.from_url(
+                        match.group(0),
+                        method=request_input.method,
+                        payload=request_input.payload.encode('utf-8'),
+                        headers=request_input.headers,
+                        user_data=request_input.user_data,
+                    )
+                    for match in matches
+                ]
+            )
+
+        remote_url_requests = []
+        for remote_url_requests_input in remote_url_requests_inputs:
+            get_response_task = asyncio.create_task(
+                http_client.send_request(
+                    method='GET',
+                    url=remote_url_requests_input.requests_from_url,
+                )
+            )
+
+            get_response_task.add_done_callback(partial(create_requests_from_response, remote_url_requests_input))
+            remote_url_requests.append(get_response_task)
+
+        await asyncio.gather(*remote_url_requests)
+        return created_requests
diff --git a/src/apify/storages/request_list.py b/src/apify/storages/request_list.py
deleted file mode 100644
index a3e11006..00000000
--- a/src/apify/storages/request_list.py
+++ /dev/null
@@ -1,137 +0,0 @@
-from __future__ import annotations
-
-import asyncio
-import re
-from asyncio import Task
-from functools import partial
-from typing import Any, Union
-
-from pydantic import BaseModel, Field, TypeAdapter
-
-from crawlee import Request
-from crawlee._types import HttpMethod
-from crawlee.http_clients import BaseHttpClient, HttpxHttpClient
-from crawlee.storages import RequestList as CrawleeRequestList
-
-URL_NO_COMMAS_REGEX = re.compile(
-    r'https?:\/\/(www\.)?([^\W_]|[^\W_][-\w0-9@:%._+~#=]{0,254}[^\W_])\.[a-z]{2,63}(:\d{1,5})?(\/[-\w@:%+.~#?&/=()]*)?'
-)
-
-
-class _RequestDetails(BaseModel):
-    method: HttpMethod = 'GET'
-    payload: str = ''
-    headers: dict[str, str] = Field(default_factory=dict)
-    user_data: dict[str, str] = Field(default_factory=dict, alias='userData')
-
-
-class _RequestsFromUrlInput(_RequestDetails):
-    requests_from_url: str = Field(alias='requestsFromUrl')
-
-
-class _SimpleUrlInput(_RequestDetails):
-    url: str
-
-
-url_input_adapter = TypeAdapter(list[Union[_RequestsFromUrlInput, _SimpleUrlInput]])
-
-
-class RequestList(CrawleeRequestList):
-    """Extends crawlee RequestList."""
-
-    @classmethod
-    async def open(
-        cls,
-        name: str | None = None,
-        actor_start_urls_input: list[dict[str, Any]] | None = None,
-        http_client: BaseHttpClient | None = None,
-    ) -> RequestList:
-        """Creates RequestList from Actor input requestListSources.
-
-        name is name of the returned RequestList
-        actor_start_urls_input  can contain list dicts with either url or requestsFromUrl key
-        http_client is client that will be used to send get request to url defined in requestsFromUrl
-
-        Example actor_start_urls_input:
-            [
-                # Gather urls from response body.
-                {'requestsFromUrl': 'https://crawlee.dev/file.txt', 'method': 'GET'},
-                # Directly include this url.
-                {'url': 'https://crawlee.dev', 'method': 'GET'}
-            ]
-        """
-        actor_start_urls_input = actor_start_urls_input or []
-        return await _create_request_list(name, actor_start_urls_input, http_client)
-
-
-async def _create_request_list(
-    name: str | None, actor_start_urls_input: list[dict[str, Any]], http_client: BaseHttpClient | None
-) -> RequestList:
-    if not http_client:
-        http_client = HttpxHttpClient()
-
-    ulr_inputs = url_input_adapter.validate_python(actor_start_urls_input)  # instance of list[Union[...]]
-
-    simple_url_inputs = [url_input for url_input in ulr_inputs if type(url_input) is _SimpleUrlInput]
-    remote_url_inputs = [url_input for url_input in ulr_inputs if type(url_input) is _RequestsFromUrlInput]
-
-    simple_url_requests = _create_requests_from_input(simple_url_inputs)
-    remote_url_requests = await _create_requests_from_url(remote_url_inputs, http_client=http_client)
-
-    return RequestList(name=name, requests=simple_url_requests + remote_url_requests)
-
-
-def _create_requests_from_input(simple_url_inputs: list[_SimpleUrlInput]) -> list[Request]:
-    return [
-        Request.from_url(
-            method=request_input.method,
-            url=request_input.url,
-            payload=request_input.payload.encode('utf-8'),
-            headers=request_input.headers,
-            user_data=request_input.user_data,
-        )
-        for request_input in simple_url_inputs
-    ]
-
-
-async def _create_requests_from_url(
-    remote_url_requests_inputs: list[_RequestsFromUrlInput], http_client: BaseHttpClient
-) -> list[Request]:
-    """Crete list of requests from url.
-
-    Send GET requests to urls defined in each requests_from_url of remote_url_requests_inputs. Run extracting
-    callback on each response body and use URL_NO_COMMAS_REGEX regex to find all links. Create list of Requests from
-    collected links and additional inputs stored in other attributes of each remote_url_requests_inputs.
-    """
-    created_requests: list[Request] = []
-
-    def create_requests_from_response(request_input: _RequestsFromUrlInput, task: Task) -> None:
-        """Callback to scrape response body with regexp and create Requests from matches."""
-        matches = re.finditer(URL_NO_COMMAS_REGEX, task.result().read().decode('utf-8'))
-        created_requests.extend(
-            [
-                Request.from_url(
-                    match.group(0),
-                    method=request_input.method,
-                    payload=request_input.payload.encode('utf-8'),
-                    headers=request_input.headers,
-                    user_data=request_input.user_data,
-                )
-                for match in matches
-            ]
-        )
-
-    remote_url_requests = []
-    for remote_url_requests_input in remote_url_requests_inputs:
-        get_response_task = asyncio.create_task(
-            http_client.send_request(
-                method='GET',
-                url=remote_url_requests_input.requests_from_url,
-            )
-        )
-
-        get_response_task.add_done_callback(partial(create_requests_from_response, remote_url_requests_input))
-        remote_url_requests.append(get_response_task)
-
-    await asyncio.gather(*remote_url_requests)
-    return created_requests
diff --git a/tests/unit/actor/test_request_list.py b/tests/unit/actor/test_request_list.py
index e3bbfb36..6c26ba64 100644
--- a/tests/unit/actor/test_request_list.py
+++ b/tests/unit/actor/test_request_list.py
@@ -11,7 +11,7 @@
 from crawlee._types import HttpHeaders, HttpMethod
 from crawlee.http_clients import HttpResponse, HttpxHttpClient
 
-from apify.storages.request_list import URL_NO_COMMAS_REGEX, RequestList
+from apify.storages._request_list import URL_NO_COMMAS_REGEX, RequestList
 
 
 @pytest.mark.parametrize('request_method', get_args(HttpMethod))
@@ -35,7 +35,7 @@ async def test_request_list_open_request_types(request_method: HttpMethod, optio
     }
     request_dict_input = {**minimal_request_dict_input, **optional_input}
 
-    request_list = await RequestList.open(actor_start_urls_input=[request_dict_input])
+    request_list = await RequestList.open(request_list_sources_input=[request_dict_input])
     assert not await request_list.is_empty()
     request = await request_list.fetch_next_request()
     assert request is not None
@@ -76,7 +76,7 @@ def read(self) -> bytes:
 
 async def test__request_list_open_from_url_correctly_send_requests() -> None:
     """Test that injected HttpClient's method send_request is called with properly passed arguments."""
-    actor_start_urls_input: list[dict[str, Any]] = [
+    request_list_sources_input: list[dict[str, Any]] = [
         {
             'requestsFromUrl': 'https://abc.dev/file.txt',
             'method': 'GET',
@@ -94,19 +94,20 @@ async def test__request_list_open_from_url_correctly_send_requests() -> None:
         },
     ]
 
-    mocked_read_outputs = ('' for url in actor_start_urls_input)
-    http_client = HttpxHttpClient()
+    mocked_read_outputs = ('' for url in request_list_sources_input)
+
+    mocked_http_client = mock.Mock(spec_set=HttpxHttpClient)
     with mock.patch.object(
-        http_client, 'send_request', return_value=_create_dummy_response(mocked_read_outputs)
+        mocked_http_client, 'send_request', return_value=_create_dummy_response(mocked_read_outputs)
     ) as mocked_send_request:
-        await RequestList.open(actor_start_urls_input=actor_start_urls_input, http_client=http_client)
+        await RequestList.open(request_list_sources_input=request_list_sources_input, http_client=mocked_http_client)
 
     expected_calls = [
         call(
             method='GET',
             url=example_input['requestsFromUrl'],
         )
-        for example_input in actor_start_urls_input
+        for example_input in request_list_sources_input
     ]
     mocked_send_request.assert_has_calls(expected_calls)
 
@@ -124,7 +125,7 @@ async def test_request_list_open_from_url() -> None:
         )
     )
 
-    actor_start_urls_input = [
+    request_list_sources_input = [
         {
             'requestsFromUrl': 'https://abc.dev/file.txt',
             'method': 'GET',
@@ -136,9 +137,11 @@ async def test_request_list_open_from_url() -> None:
         },
     ]
 
-    http_client = HttpxHttpClient()
-    with mock.patch.object(http_client, 'send_request', return_value=_create_dummy_response(response_bodies)):
-        request_list = await RequestList.open(actor_start_urls_input=actor_start_urls_input, http_client=http_client)
+    mocked_http_client = mock.Mock(spec_set=HttpxHttpClient)
+    with mock.patch.object(mocked_http_client, 'send_request', return_value=_create_dummy_response(response_bodies)):
+        request_list = await RequestList.open(
+            request_list_sources_input=request_list_sources_input, http_client=mocked_http_client
+        )
         generated_requests = []
         while request := await request_list.fetch_next_request():
             generated_requests.append(request)
@@ -159,9 +162,11 @@ async def test_request_list_open_from_url_additional_inputs() -> None:
     }
 
     response_bodies = iter((expected_simple_url,))
-    http_client = HttpxHttpClient()
-    with mock.patch.object(http_client, 'send_request', return_value=_create_dummy_response(response_bodies)):
-        request_list = await RequestList.open(actor_start_urls_input=[example_start_url_input], http_client=http_client)
+    mocked_http_client = mock.Mock(spec_set=HttpxHttpClient)
+    with mock.patch.object(mocked_http_client, 'send_request', return_value=_create_dummy_response(response_bodies)):
+        request_list = await RequestList.open(
+            request_list_sources_input=[example_start_url_input], http_client=mocked_http_client
+        )
         request = await request_list.fetch_next_request()
 
     # Check all properties correctly created for request

From 318c9c8353c9d0c0a433c075de350ddb3ef0be3c Mon Sep 17 00:00:00 2001
From: Josef Prochazka <josef.prochazka@apify.com>
Date: Wed, 20 Nov 2024 09:26:50 +0100
Subject: [PATCH 17/20] Addresing review comments 2

---
 src/apify/storages/_request_list.py   |  32 +++++---
 tests/unit/actor/test_request_list.py | 108 ++++++++++----------------
 2 files changed, 62 insertions(+), 78 deletions(-)

diff --git a/src/apify/storages/_request_list.py b/src/apify/storages/_request_list.py
index c4ca1d9a..b3ce3f03 100644
--- a/src/apify/storages/_request_list.py
+++ b/src/apify/storages/_request_list.py
@@ -51,17 +51,25 @@ async def open(
     ) -> RequestList:
         """Creates RequestList from Actor input requestListSources.
 
-        name is name of the returned RequestList
-        request_list_sources_input  can contain list dicts with either url or requestsFromUrl key
-        http_client is client that will be used to send get request to url defined in requestsFromUrl
-
-        Example request_list_sources_input:
-            [
-                # Gather urls from response body.
-                {'requestsFromUrl': 'https://crawlee.dev/file.txt', 'method': 'GET'},
-                # Directly include this url.
-                {'url': 'https://crawlee.dev', 'method': 'GET'}
-            ]
+        Args:
+            name: Name of the returned RequestList.
+            request_list_sources_input: List of dicts with either url key or requestsFromUrl key.
+            http_client: Client that will be used to send get request to urls defined by value of requestsFromUrl keys.
+
+        Returns:
+            RequestList created from request_list_sources_input.
+
+        ### Usage
+
+        ```python
+        example_input = [
+            # Gather urls from response body.
+            {'requestsFromUrl': 'https://crawlee.dev/file.txt', 'method': 'GET'},
+            # Directly include this url.
+            {'url': 'https://crawlee.dev', 'method': 'GET'}
+        ]
+        request_list = await RequestList.open(request_list_sources_input=example_input)
+        ```
         """
         request_list_sources_input = request_list_sources_input or []
         return await RequestList._create_request_list(name, request_list_sources_input, http_client)
@@ -73,7 +81,7 @@ async def _create_request_list(
         if not http_client:
             http_client = HttpxHttpClient()
 
-        ulr_inputs = url_input_adapter.validate_python(request_list_sources_input)  # instance of list[Union[...]]
+        ulr_inputs = url_input_adapter.validate_python(request_list_sources_input)
 
         simple_url_inputs = [url_input for url_input in ulr_inputs if type(url_input) is _SimpleUrlInput]
         remote_url_inputs = [url_input for url_input in ulr_inputs if type(url_input) is _RequestsFromUrlInput]
diff --git a/tests/unit/actor/test_request_list.py b/tests/unit/actor/test_request_list.py
index 6c26ba64..4f4c75ac 100644
--- a/tests/unit/actor/test_request_list.py
+++ b/tests/unit/actor/test_request_list.py
@@ -1,15 +1,15 @@
 from __future__ import annotations
 
 import re
-from typing import Any, Iterator, get_args
-from unittest import mock
-from unittest.mock import call
+from dataclasses import dataclass
+from typing import Any, get_args
 
 import pytest
+import respx
+from httpx import Response
 
 from crawlee._request import UserData
-from crawlee._types import HttpHeaders, HttpMethod
-from crawlee.http_clients import HttpResponse, HttpxHttpClient
+from crawlee._types import HttpMethod
 
 from apify.storages._request_list import URL_NO_COMMAS_REGEX, RequestList
 
@@ -52,30 +52,9 @@ async def test_request_list_open_request_types(request_method: HttpMethod, optio
     assert request.headers.root == optional_input.get('headers', {})
 
 
-def _create_dummy_response(read_output: Iterator[str]) -> HttpResponse:
-    """Create dummy_response that will iterate through read_output when called like dummy_response.read()"""
-
-    class DummyResponse(HttpResponse):
-        @property
-        def http_version(self) -> str:
-            return ''
-
-        @property
-        def status_code(self) -> int:
-            return 200
-
-        @property
-        def headers(self) -> HttpHeaders:
-            return HttpHeaders()
-
-        def read(self) -> bytes:
-            return next(read_output).encode('utf-8')
-
-    return DummyResponse()
-
-
-async def test__request_list_open_from_url_correctly_send_requests() -> None:
-    """Test that injected HttpClient's method send_request is called with properly passed arguments."""
+@respx.mock
+async def test_request_list_open_from_url_correctly_send_requests() -> None:
+    """Test that requests are sent to expected urls."""
     request_list_sources_input: list[dict[str, Any]] = [
         {
             'requestsFromUrl': 'https://abc.dev/file.txt',
@@ -94,65 +73,65 @@ async def test__request_list_open_from_url_correctly_send_requests() -> None:
         },
     ]
 
-    mocked_read_outputs = ('' for url in request_list_sources_input)
+    routes = [respx.get(entry['requestsFromUrl']) for entry in request_list_sources_input]
 
-    mocked_http_client = mock.Mock(spec_set=HttpxHttpClient)
-    with mock.patch.object(
-        mocked_http_client, 'send_request', return_value=_create_dummy_response(mocked_read_outputs)
-    ) as mocked_send_request:
-        await RequestList.open(request_list_sources_input=request_list_sources_input, http_client=mocked_http_client)
+    await RequestList.open(request_list_sources_input=request_list_sources_input)
 
-    expected_calls = [
-        call(
-            method='GET',
-            url=example_input['requestsFromUrl'],
-        )
-        for example_input in request_list_sources_input
-    ]
-    mocked_send_request.assert_has_calls(expected_calls)
+    for route in routes:
+        assert route.called
 
 
+@respx.mock
 async def test_request_list_open_from_url() -> None:
     """Test that create_request_list is correctly reading urls from remote url sources and also from simple input."""
     expected_simple_url = 'https://www.someurl.com'
     expected_remote_urls_1 = {'http://www.something.com', 'https://www.somethingelse.com', 'http://www.bla.net'}
     expected_remote_urls_2 = {'http://www.ok.com', 'https://www.true-positive.com'}
     expected_urls = expected_remote_urls_1 | expected_remote_urls_2 | {expected_simple_url}
-    response_bodies = iter(
-        (
+
+    @dataclass
+    class MockedUrlInfo:
+        url: str
+        response_text: str
+
+    mocked_urls = (
+        MockedUrlInfo(
+            'https://abc.dev/file.txt',
             'blablabla{} more blablabla{} , even more blablabla. {} '.format(*expected_remote_urls_1),
-            'some stuff{} more stuff{} www.falsepositive www.false_positive.com'.format(*expected_remote_urls_2),
-        )
+        ),
+        MockedUrlInfo(
+            'https://www.abc.dev/file2',
+            'some stuff{} more stuff{} www.false_positive.com'.format(*expected_remote_urls_2),
+        ),
     )
 
     request_list_sources_input = [
         {
-            'requestsFromUrl': 'https://abc.dev/file.txt',
+            'requestsFromUrl': mocked_urls[0].url,
             'method': 'GET',
         },
         {'url': expected_simple_url, 'method': 'GET'},
         {
-            'requestsFromUrl': 'https://www.abc.dev/file2',
+            'requestsFromUrl': mocked_urls[1].url,
             'method': 'GET',
         },
     ]
+    for mocked_url in mocked_urls:
+        respx.get(mocked_url.url).mock(return_value=Response(200, text=mocked_url.response_text))
 
-    mocked_http_client = mock.Mock(spec_set=HttpxHttpClient)
-    with mock.patch.object(mocked_http_client, 'send_request', return_value=_create_dummy_response(response_bodies)):
-        request_list = await RequestList.open(
-            request_list_sources_input=request_list_sources_input, http_client=mocked_http_client
-        )
-        generated_requests = []
-        while request := await request_list.fetch_next_request():
-            generated_requests.append(request)
+    request_list = await RequestList.open(request_list_sources_input=request_list_sources_input)
+    generated_requests = []
+    while request := await request_list.fetch_next_request():
+        generated_requests.append(request)
 
     # Check correctly created requests' urls in request list
     assert {generated_request.url for generated_request in generated_requests} == expected_urls
 
 
+@respx.mock
 async def test_request_list_open_from_url_additional_inputs() -> None:
     """Test that all generated request properties are correctly populated from input values."""
-    expected_simple_url = 'https://www.someurl.com'
+    expected_url = 'https://www.someurl.com'
     example_start_url_input: dict[str, Any] = {
         'requestsFromUrl': 'https://crawlee.dev/file.txt',
         'method': 'POST',
@@ -161,17 +140,14 @@ async def test_request_list_open_from_url_additional_inputs() -> None:
         'userData': {'another_key': 'another_value'},
     }
 
-    response_bodies = iter((expected_simple_url,))
-    mocked_http_client = mock.Mock(spec_set=HttpxHttpClient)
-    with mock.patch.object(mocked_http_client, 'send_request', return_value=_create_dummy_response(response_bodies)):
-        request_list = await RequestList.open(
-            request_list_sources_input=[example_start_url_input], http_client=mocked_http_client
-        )
-        request = await request_list.fetch_next_request()
+    respx.get(example_start_url_input['requestsFromUrl']).mock(return_value=Response(200, text=expected_url))
+
+    request_list = await RequestList.open(request_list_sources_input=[example_start_url_input])
+    request = await request_list.fetch_next_request()
 
     # Check all properties correctly created for request
     assert request
-    assert request.url == expected_simple_url
+    assert request.url == expected_url
     assert request.method == example_start_url_input['method']
     assert request.headers.root == example_start_url_input['headers']
     assert request.payload == str(example_start_url_input['payload']).encode('utf-8')

From feff6badb5badccb01170fa2f8c62f1480158291 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Josef=20Proch=C3=A1zka?= <jos.prochazka@post.cz>
Date: Wed, 20 Nov 2024 15:18:10 +0100
Subject: [PATCH 18/20] Update src/apify/storages/_request_list.py

Co-authored-by: Jan Buchar <jan.buchar@apify.com>
---
 src/apify/storages/_request_list.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/apify/storages/_request_list.py b/src/apify/storages/_request_list.py
index b3ce3f03..3d21871c 100644
--- a/src/apify/storages/_request_list.py
+++ b/src/apify/storages/_request_list.py
@@ -81,10 +81,10 @@ async def _create_request_list(
         if not http_client:
             http_client = HttpxHttpClient()
 
-        ulr_inputs = url_input_adapter.validate_python(request_list_sources_input)
+        url_inputs = url_input_adapter.validate_python(request_list_sources_input)
 
-        simple_url_inputs = [url_input for url_input in ulr_inputs if type(url_input) is _SimpleUrlInput]
-        remote_url_inputs = [url_input for url_input in ulr_inputs if type(url_input) is _RequestsFromUrlInput]
+        simple_url_inputs = [url_input for url_input in url_inputs if isinstance(url_input, _SimpleUrlInput)]
+        remote_url_inputs = [url_input for url_input in url_inputs if isinstance(url_input, _RequestsFromUrlInput)]
 
         simple_url_requests = RequestList._create_requests_from_input(simple_url_inputs)
         remote_url_requests = await RequestList._fetch_requests_from_url(remote_url_inputs, http_client=http_client)

From b150e1dec152791d373de8a27f7d050e1d96381a Mon Sep 17 00:00:00 2001
From: Josef Prochazka <josef.prochazka@apify.com>
Date: Wed, 20 Nov 2024 15:30:40 +0100
Subject: [PATCH 19/20] Use docs_group decorator

---
 src/apify/storages/_request_list.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/apify/storages/_request_list.py b/src/apify/storages/_request_list.py
index 3d21871c..0ad521c6 100644
--- a/src/apify/storages/_request_list.py
+++ b/src/apify/storages/_request_list.py
@@ -13,6 +13,8 @@
 from crawlee.http_clients import BaseHttpClient, HttpxHttpClient
 from crawlee.storages import RequestList as CrawleeRequestList
 
+from apify._utils import docs_group
+
 URL_NO_COMMAS_REGEX = re.compile(
     r'https?:\/\/(www\.)?([^\W_]|[^\W_][-\w0-9@:%._+~#=]{0,254}[^\W_])\.[a-z]{2,63}(:\d{1,5})?(\/[-\w@:%+.~#?&/=()]*)?'
 )
@@ -36,7 +38,7 @@ class _SimpleUrlInput(_RequestDetails):
 url_input_adapter = TypeAdapter(list[Union[_RequestsFromUrlInput, _SimpleUrlInput]])
 
 
-# @docs_group('Classes')  # Not yet available in crawlee
+@docs_group('Classes')  # Not yet available in crawlee
 class RequestList(CrawleeRequestList):
     """Extends crawlee RequestList.
 

From 470041f3bc1c76c0ef6e2ebe46a3ff1b41dfb991 Mon Sep 17 00:00:00 2001
From: Jan Buchar <jan.buchar@apify.com>
Date: Wed, 20 Nov 2024 16:44:31 +0100
Subject: [PATCH 20/20] Update src/apify/storages/_request_list.py

---
 src/apify/storages/_request_list.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/apify/storages/_request_list.py b/src/apify/storages/_request_list.py
index 0ad521c6..2dd381fa 100644
--- a/src/apify/storages/_request_list.py
+++ b/src/apify/storages/_request_list.py
@@ -38,7 +38,7 @@ class _SimpleUrlInput(_RequestDetails):
 url_input_adapter = TypeAdapter(list[Union[_RequestsFromUrlInput, _SimpleUrlInput]])
 
 
-@docs_group('Classes')  # Not yet available in crawlee
+@docs_group('Classes')
 class RequestList(CrawleeRequestList):
     """Extends crawlee RequestList.