Skip to content

Commit a1da132

Browse files
committed
improvement of forefront feature
1 parent 5f890be commit a1da132

File tree

1 file changed

+13
-11
lines changed

1 file changed

+13
-11
lines changed

src/crawlee/storage_clients/_file_system/_request_queue_client.py

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -92,8 +92,9 @@ def __init__(
9292
self._in_progress = set[str]()
9393
"""A set of request IDs that are currently being processed."""
9494

95-
self._forefront_requests = set[str]()
96-
"""A set of request IDs that should be prioritized (added with forefront=True)."""
95+
self._forefront_requests = list[str]()
96+
"""A list of request IDs that should be prioritized (added with forefront=True).
97+
Most recent forefront requests are added at the beginning of the list."""
9798

9899
@override
99100
@property
@@ -320,7 +321,7 @@ async def add_batch_of_requests(
320321

321322
# If forefront and existing request is not handled, mark it as forefront
322323
if forefront and was_already_present and not was_already_handled and existing_request:
323-
self._forefront_requests.add(existing_request.id)
324+
self._forefront_requests.insert(0, existing_request.id)
324325
processed_requests.append(
325326
ProcessedRequest(
326327
id=existing_request.id,
@@ -363,9 +364,9 @@ async def add_batch_of_requests(
363364
new_total_request_count += 1
364365
new_pending_request_count += 1
365366

366-
# If forefront, add to the forefront set
367+
# If forefront, add to the forefront list
367368
if forefront:
368-
self._forefront_requests.add(request.id)
369+
self._forefront_requests.insert(0, request.id)
369370

370371
processed_requests.append(
371372
ProcessedRequest(
@@ -487,8 +488,9 @@ async def fetch_next_request(self) -> Request | None:
487488
# Mark as in-progress in memory
488489
self._in_progress.add(request.id)
489490

490-
# Remove from forefront set if it was there
491-
self._forefront_requests.discard(request.id)
491+
# Remove from forefront list if it was there
492+
if request.id in self._forefront_requests:
493+
self._forefront_requests.remove(request.id)
492494

493495
# Update accessed timestamp
494496
await self._update_metadata(update_accessed_at=True)
@@ -578,10 +580,10 @@ async def reclaim_request(
578580

579581
# If forefront is true, mark this request as priority
580582
if forefront:
581-
self._forefront_requests.add(request.id)
582-
else:
583-
# Make sure it's not in the forefront set if it was previously added there
584-
self._forefront_requests.discard(request.id)
583+
self._forefront_requests.insert(0, request.id)
584+
# Make sure it's not in the forefront list if it was previously added there
585+
elif request.id in self._forefront_requests:
586+
self._forefront_requests.remove(request.id)
585587

586588
# To simulate changing the file timestamp for FIFO ordering,
587589
# we'll update the file with current timestamp

0 commit comments

Comments
 (0)