Skip to content

Commit 0623ade

Browse files
committed
Properly pass request creation settings.
TODO: Finish test for it. WIP
1 parent f4b3fc5 commit 0623ade

File tree

2 files changed

+25
-11
lines changed

2 files changed

+25
-11
lines changed

src/apify/_actor_inputs.py

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from asyncio import Task
66
from typing import Any
77

8+
from functools import partial
89
from pydantic import BaseModel, Field
910

1011
from crawlee import Request
@@ -67,23 +68,34 @@ def _create_requests_from_input(simple_url_inputs: list[_SimpleUrlInput]) -> lis
6768
async def _create_requests_from_url(
6869
remote_url_requests_inputs: list[_RequestsFromUrlInput], http_client: BaseHttpClient
6970
) -> list[Request]:
71+
"""Crete list of requests from url.
72+
73+
Send GET requests to urls defined in each requests_from_url of remote_url_requests_inputs. Run extracting
74+
callback on each response body and use URL_NO_COMMAS_REGEX regexp to find all links. Create list of Requests from
75+
collected links and additional inputs stored in other attributes of each remote_url_requests_inputs.
76+
"""
7077
created_requests: list[Request] = []
7178

72-
def extract_requests_from_response(task: Task) -> list[Request]:
79+
def create_requests_from_response(request_input: _SimpleUrlInput, task: Task) -> list[Request]:
80+
"""Callback to scrape response body with regexp and create Requests from macthes."""
7381
matches = re.finditer(URL_NO_COMMAS_REGEX, task.result().read().decode('utf-8'))
74-
created_requests.extend([Request.from_url(match.group(0)) for match in matches])
82+
created_requests.extend([Request.from_url(
83+
match.group(0),
84+
method=request_input.method,
85+
payload=request_input.payload.encode('utf-8'),
86+
headers=request_input.headers,
87+
user_data=request_input.user_data) for match in matches])
7588

7689
remote_url_requests = []
77-
for request_input in remote_url_requests_inputs:
90+
for remote_url_requests_input in remote_url_requests_inputs:
7891
task = asyncio.create_task(
7992
http_client.send_request(
80-
method=request_input.method,
81-
url=request_input.requests_from_url,
82-
headers=request_input.headers,
83-
payload=request_input.payload.encode('utf-8'),
93+
method='GET',
94+
url=remote_url_requests_input.requests_from_url,
8495
)
8596
)
86-
task.add_done_callback(extract_requests_from_response)
97+
98+
task.add_done_callback(partial(create_requests_from_response, remote_url_requests_input))
8799
remote_url_requests.append(task)
88100

89101
await asyncio.gather(*remote_url_requests)

tests/unit/actor/test_actor_create_request_list.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -96,10 +96,8 @@ async def test_actor_create_request_list_from_url_correctly_send_requests() -> N
9696

9797
expected_calls = [
9898
call(
99-
method=example_input['method'],
99+
method='GET',
100100
url=example_input['requests_from_url'],
101-
headers=example_input.get('headers', {}),
102-
payload=example_input.get('payload', '').encode('utf-8'),
103101
)
104102
for example_input in example_start_urls_input
105103
]
@@ -137,3 +135,7 @@ async def test_actor_create_request_list_from_url() -> None:
137135

138136
# Check correctly created requests' urls in request list
139137
assert {generated_request.url for generated_request in generated_requests} == expected_urls
138+
139+
async def test_actor_create_request_list_from_url_additional_inputs() -> None:
140+
assert False
141+
# TODO test that will check that additional properties, like payload, headers request type are all properly passed.

0 commit comments

Comments
 (0)