@@ -117,8 +117,10 @@ async def test_request_queue_deduplication(
117
117
make_actor : MakeActorFunction ,
118
118
run_actor : RunActorFunction ,
119
119
) -> None :
120
- """Test that the deduplication works correctly. Try to add 2 same requests, but it should call API just once.
120
+ """Test that the deduplication works correctly. Try to add 2 similar requests, but it should call API just once.
121
121
122
+ Deduplication works based on the request's `unique_key` only. To include more attributes in the unique key the
123
+ `use_extended_unique_key=True` argument of `Request.from_url` method can be used.
122
124
This tests internal optimization that changes no behavior for the user.
123
125
The functions input/output behave the same way,it only uses less amount of API calls.
124
126
"""
@@ -129,7 +131,8 @@ async def main() -> None:
129
131
from apify import Actor , Request
130
132
131
133
async with Actor :
132
- request = Request .from_url ('http://example.com' )
134
+ request1 = Request .from_url ('http://example.com' , method = 'POST' )
135
+ request2 = Request .from_url ('http://example.com' , method = 'GET' )
133
136
rq = await Actor .open_request_queue ()
134
137
135
138
await asyncio .sleep (10 ) # Wait to be sure that metadata are updated
@@ -142,8 +145,8 @@ async def main() -> None:
142
145
Actor .log .info (stats_before )
143
146
144
147
# Add same request twice
145
- await rq .add_request (request )
146
- await rq .add_request (request )
148
+ await rq .add_request (request1 )
149
+ await rq .add_request (request2 )
147
150
148
151
await asyncio .sleep (10 ) # Wait to be sure that metadata are updated
149
152
_rq = await rq_client .get ()
@@ -159,6 +162,55 @@ async def main() -> None:
159
162
assert run_result .status == 'SUCCEEDED'
160
163
161
164
165
+ async def test_request_queue_deduplication_use_extended_unique_key (
166
+ make_actor : MakeActorFunction ,
167
+ run_actor : RunActorFunction ,
168
+ ) -> None :
169
+ """Test that the deduplication works correctly. Try to add 2 similar requests and it should call API just twice.
170
+
171
+ Deduplication works based on the request's `unique_key` only. To include more attributes in the unique key the
172
+ `use_extended_unique_key=True` argument of `Request.from_url` method can be used.
173
+ This tests internal optimization that changes no behavior for the user.
174
+ The functions input/output behave the same way,it only uses less amount of API calls.
175
+ """
176
+
177
+ async def main () -> None :
178
+ import asyncio
179
+
180
+ from apify import Actor , Request
181
+
182
+ async with Actor :
183
+ request1 = Request .from_url ('http://example.com' , method = 'POST' , use_extended_unique_key = True )
184
+ request2 = Request .from_url ('http://example.com' , method = 'GET' , use_extended_unique_key = True )
185
+ rq = await Actor .open_request_queue ()
186
+
187
+ await asyncio .sleep (10 ) # Wait to be sure that metadata are updated
188
+
189
+ # Get raw client, because stats are not exposed in `RequestQueue` class, but are available in raw client
190
+ rq_client = Actor .apify_client .request_queue (request_queue_id = rq .id )
191
+ _rq = await rq_client .get ()
192
+ assert _rq
193
+ stats_before = _rq .get ('stats' , {})
194
+ Actor .log .info (stats_before )
195
+
196
+ # Add same request twice
197
+ await rq .add_request (request1 )
198
+ await rq .add_request (request2 )
199
+
200
+ await asyncio .sleep (10 ) # Wait to be sure that metadata are updated
201
+ _rq = await rq_client .get ()
202
+ assert _rq
203
+ stats_after = _rq .get ('stats' , {})
204
+ Actor .log .info (stats_after )
205
+
206
+ assert (stats_after ['writeCount' ] - stats_before ['writeCount' ]) == 2
207
+
208
+ actor = await make_actor (label = 'rq-deduplication' , main_func = main )
209
+ run_result = await run_actor (actor )
210
+
211
+ assert run_result .status == 'SUCCEEDED'
212
+
213
+
162
214
async def test_request_queue_parallel_deduplication (
163
215
make_actor : MakeActorFunction ,
164
216
run_actor : RunActorFunction ,
0 commit comments