Skip to content

Commit f7b0dfb

Browse files
committed
up tests
1 parent 9f8d7b4 commit f7b0dfb

File tree

2 files changed

+7
-4
lines changed

2 files changed

+7
-4
lines changed

src/crawlee/request_loaders/_sitemap_request_loader.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,10 @@ def __init__(
8080
exclude: List of glob or regex patterns to exclude URLs.
8181
max_buffer_size: Maximum number of URLs to buffer in memory.
8282
http_client: the instance of `HttpClient` to use for fetching sitemaps.
83-
persist_state_key: Key in key-value store for persisting.
83+
persist_state_key: Key in key-value store for persisting state. Each SitemapRequestLoader
84+
instance must use a unique key to avoid conflicts when multiple loaders run
85+
concurrently. Sharing the same key between instances will cause state corruption
86+
and unpredictable behavior.
8487
persist_enabled: Flag to enable/disable persistence.
8588
"""
8689
self._http_client = http_client

tests/unit/request_loaders/test_sitemap_request_loader.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ async def test_abort_sitemap_loading(server_url: URL, http_client: HttpClient) -
114114

115115
async def test_create_persist_state_for_sitemap_loading(server_url: URL, http_client: HttpClient) -> None:
116116
sitemap_url = (server_url / 'sitemap.xml').with_query(base64=encode_base64(BASIC_SITEMAP.encode()))
117-
persist_key = f'test_sitemap_loader_{id(http_client)}'
117+
persist_key = f'create_persist_state_{id(http_client)}'
118118
sitemap_loader = SitemapRequestLoader(
119119
[str(sitemap_url)], http_client=http_client, persist_state_key=persist_key, persist_enabled=True
120120
)
@@ -132,7 +132,7 @@ async def test_create_persist_state_for_sitemap_loading(server_url: URL, http_cl
132132

133133
async def test_data_persistence_for_sitemap_loading(server_url: URL, http_client: HttpClient) -> None:
134134
sitemap_url = (server_url / 'sitemap.xml').with_query(base64=encode_base64(BASIC_SITEMAP.encode()))
135-
persist_key = f'test_sitemap_loader_{id(http_client)}'
135+
persist_key = f'data_persist_state_{id(http_client)}'
136136
sitemap_loader = SitemapRequestLoader(
137137
[str(sitemap_url)], http_client=http_client, persist_state_key=persist_key, persist_enabled=True
138138
)
@@ -155,7 +155,7 @@ async def test_recovery_data_persistence_for_sitemap_loading(
155155
http_client: HttpClient,
156156
) -> None:
157157
sitemap_url = (server_url / 'sitemap.xml').with_query(base64=encode_base64(BASIC_SITEMAP.encode()))
158-
persist_key = f'test_sitemap_loader_{id(http_client)}'
158+
persist_key = f'recovery_persist_state_{id(http_client)}'
159159
sitemap_loader = SitemapRequestLoader(
160160
[str(sitemap_url)], http_client=http_client, persist_state_key=persist_key, persist_enabled=True
161161
)

0 commit comments

Comments
 (0)