Skip to content

Commit 0201dba

Browse files
committed
add 403 logging
1 parent 3b13aae commit 0201dba

File tree

2 files changed

+7
-3
lines changed

2 files changed

+7
-3
lines changed

v2ex_scrapy/middlewares.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,17 @@
55

66
# useful for handling different item types with a single interface
77

8+
import logging
89
import random
910
import time
1011

1112
import scrapy
1213
import scrapy.http.response.html
1314
from scrapy import signals
1415
from scrapy.exceptions import IgnoreRequest
15-
from v2ex_scrapy.DB import DB, LogItem
16+
1617
from v2ex_scrapy import utils
18+
from v2ex_scrapy.DB import DB, LogItem
1719

1820

1921
class TutorialScrapySpiderMiddleware:
@@ -70,6 +72,7 @@ class ProxyAndCookieDownloaderMiddleware:
7072
def __init__(self):
7173
self.proxies: list[str] = []
7274
self.cookies: dict[str, str] = {}
75+
self.logger = logging.getLogger(__name__)
7376

7477
@classmethod
7578
def from_crawler(cls, crawler):
@@ -102,6 +105,7 @@ def process_response(
102105
):
103106
# Called with the response returned from the downloader.
104107
if response.status == 403:
108+
self.logger.info(f"skip url:{response.url}, because 403")
105109
raise IgnoreRequest(f"403 url {response.url}")
106110
# Must either;
107111
# - return a Response object
@@ -123,7 +127,7 @@ def spider_opened(self, spider: scrapy.Spider):
123127
self.proxies = spider.settings.get("PROXIES", []) # type: ignore
124128

125129
cookie_str = spider.settings.get("COOKIES", "")
126-
self.cookies = utils.cookie_str2cookie_dict(cookie_str) # type: ignore
130+
self.cookies = utils.cookie_str2cookie_dict(cookie_str) # type: ignore
127131

128132
spider.logger.info("Spider opened: %s" % spider.name)
129133

v2ex_scrapy/pipelines.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ def process_item(
4343

4444
def process_it(self, items: list[ItemsType]):
4545
if len(items) > 0 and isinstance(items[0], MemberItem):
46-
self.process_members(items)
46+
self.process_members(items) # type: ignore
4747
else:
4848
self.db.session.add_all(items)
4949
self.db.session.commit()

0 commit comments

Comments
 (0)