Skip to content

Commit dffc968

Browse files
authored
Add process_spider_output_async() to the spider middleware. (#91)
* Add process_spider_output_async() to the spider middleware. * Remove an extra empty line.
1 parent 970b45c commit dffc968

File tree

1 file changed

+14
-3
lines changed

1 file changed

+14
-3
lines changed

sh_scrapy/middlewares.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# -*- coding: utf-8 -*-
22
import itertools
3+
from warnings import warn
34
from weakref import WeakKeyDictionary
45

56
from scrapy import Request
@@ -28,11 +29,21 @@ def process_spider_output(self, response, result, spider):
2829
parent = self._seen_requests.pop(response.request, None)
2930
for x in result:
3031
if isinstance(x, Request):
31-
x.meta[HS_PARENT_ID_KEY] = parent
32-
# Remove request id if it was for some reason set in the request coming from Spider.
33-
x.meta.pop(HS_REQUEST_ID_KEY, None)
32+
self._process_request(x, parent)
3433
yield x
3534

35+
async def process_spider_output_async(self, response, result, spider):
36+
parent = self._seen_requests.pop(response.request, None)
37+
async for x in result:
38+
if isinstance(x, Request):
39+
self._process_request(x, parent)
40+
yield x
41+
42+
def _process_request(self, request, parent):
43+
request.meta[HS_PARENT_ID_KEY] = parent
44+
# Remove request id if it was for some reason set in the request coming from Spider.
45+
request.meta.pop(HS_REQUEST_ID_KEY, None)
46+
3647

3748
class HubstorageDownloaderMiddleware:
3849
"""Hubstorage dowloader middleware.

0 commit comments

Comments
 (0)