Skip to content

Commit 6f6394a

Browse files
committed
fix
1 parent eb1c0ac commit 6f6394a

File tree

1 file changed

+32
-30
lines changed

1 file changed

+32
-30
lines changed

src/apify_client/clients/resource_clients/request_queue.py

Lines changed: 32 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import math
66
from dataclasses import dataclass
77
from datetime import timedelta
8+
from multiprocessing import process
89
from typing import Any, TypedDict
910

1011
from apify_shared.utils import filter_out_none_values_recursively, ignore_docs, parse_date_fields
@@ -586,10 +587,13 @@ async def _batch_add_requests_worker(
586587
processed_requests = list[dict]()
587588
unprocessed_requests = list[dict]()
588589

589-
try:
590-
while True:
590+
while True:
591+
try:
591592
batch = await queue.get()
593+
except asyncio.CancelledError:
594+
break
592595

596+
try:
593597
response = await self.http_client.call(
594598
url=self._url('requests/batch'),
595599
method='POST',
@@ -599,29 +603,23 @@ async def _batch_add_requests_worker(
599603

600604
response_parsed = parse_date_fields(pluck_data(response.json()))
601605

602-
# If the request was successful, add it to the processed requests.
603-
if 200 <= response.status_code <= 299:
604-
processed_requests.append(response_parsed)
605-
606606
# If the request was not successful and the number of retries is less than the maximum,
607-
# retry the request.
608-
elif batch.num_of_retries < max_unprocessed_requests_retries:
607+
# put the batch back into the queue and retry the request later.
608+
if (not response.is_success) and batch.num_of_retries < max_unprocessed_requests_retries:
609609
batch.num_of_retries += 1
610610
await asyncio.sleep(min_delay_between_unprocessed_requests_retries.total_seconds())
611611
await queue.put(batch)
612612

613-
# Otherwise, add the request to the unprocessed requests.
613+
# Otherwise, extract the processed and unprocessed requests from the response.
614614
else:
615-
unprocessed_requests.append(response_parsed)
615+
processed_requests.extend(response_parsed.get('processedRequests', []))
616+
unprocessed_requests.extend(response_parsed.get('unprocessedRequests', []))
616617

617-
except asyncio.CancelledError:
618-
logger.debug('Worker task was cancelled.')
618+
except Exception as exc:
619+
logger.warning(f'Error occurred while processing a batch of requests: {exc}')
619620

620-
except Exception as exc:
621-
logger.warning('Worker task failed with an exception.', exc_info=exc)
622-
623-
finally:
624-
queue.task_done()
621+
finally:
622+
queue.task_done()
625623

626624
return {
627625
'processed_requests': processed_requests,
@@ -670,31 +668,35 @@ async def batch_add_requests(
670668

671669
# Start the worker tasks.
672670
for i in range(max_parallel):
673-
task = asyncio.create_task(
674-
self._batch_add_requests_worker(
675-
queue,
676-
request_params,
677-
max_unprocessed_requests_retries,
678-
min_delay_between_unprocessed_requests_retries,
679-
),
680-
name=f'batch_add_requests_worker_{i}',
671+
coro = self._batch_add_requests_worker(
672+
queue,
673+
request_params,
674+
max_unprocessed_requests_retries,
675+
min_delay_between_unprocessed_requests_retries,
681676
)
677+
task = asyncio.create_task(coro, name=f'batch_add_requests_worker_{i}')
682678
tasks.add(task)
683679

684680
# Wait for all batches to be processed.
685681
await queue.join()
686682

687-
# Send cancel signals to all worker tasks.
683+
# Send cancel signals to all worker tasks and wait for them to finish.
688684
for task in tasks:
689685
task.cancel()
690686

691-
# Wait for all worker tasks to finish.
692687
results: list[BatchAddRequestsResult] = await asyncio.gather(*tasks)
693688

694-
# Combine the results from all worker tasks.
689+
# Combine the results from all worker tasks and return them.
690+
processed_requests = []
691+
unprocessed_requests = []
692+
693+
for result in results:
694+
processed_requests.extend(result['processed_requests'])
695+
unprocessed_requests.extend(result['unprocessed_requests'])
696+
695697
return {
696-
'processed_requests': [req for result in results for req in result['processed_requests']],
697-
'unprocessed_requests': [req for result in results for req in result['unprocessed_requests']],
698+
'processed_requests': processed_requests,
699+
'unprocessed_requests': unprocessed_requests,
698700
}
699701

700702
async def batch_delete_requests(self: RequestQueueClientAsync, requests: list[dict]) -> dict:

0 commit comments

Comments
 (0)