Skip to content

Commit 65e455d

Browse files
committed
fix reply_content, topic_favorite_count and topic_thank_count
1 parent 03a883c commit 65e455d

File tree

1 file changed

+9
-7
lines changed

1 file changed

+9
-7
lines changed

v2ex_scrapy/v2ex_parser.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ def parse_comment(response: scrapy.http.response.html.HtmlResponse, topic_id):
3939
# if not self.db.exist(CommentItem, comment_id):
4040
cbox = reply_row.css("tr")
4141
author_name = cbox.css(".dark::text").get("-1")
42-
reply_content = cbox.css(".reply_content").xpath("string(.)").get("")
42+
reply_content = cbox.xpath('.//div[@class="reply_content"]').get("")
4343
reply_time = cbox.css(".ago::attr(title)").get("")
4444
thank_count = cbox.css(".fade::text").get("0").strip()
4545
yield CommentItem(
@@ -62,14 +62,16 @@ def parse_topic(response: scrapy.http.response.html.HtmlResponse, topic_id):
6262
topic_click_count = response.css(".header > small::text").re_first(r"\d+", "-1")
6363
topic_tags = response.css(".tag::attr(href)").re(r"/tag/(.*)")
6464
topic_vote = response.xpath('(//a[@class="vote"])[1]/text()').re_first(r"\d+", "0")
65-
# need login, some topics may not have this
65+
# need login, some topics may not have
6666
topic_favorite_count = -1
6767
topic_thank_count = -1
68-
topic_stats = response.css(".topic_stats::text").re(r"\d+")
69-
if len(topic_stats) == 3:
70-
# topic_click_count = topic_stats[0]
71-
topic_favorite_count = topic_stats[1]
72-
topic_thank_count = topic_stats[2]
68+
if response.css(".topic_stats::text").get() is not None:
69+
topic_favorite_count = response.css(".topic_stats::text").re_first(
70+
r"(\d+) 人收藏", "0"
71+
)
72+
topic_thank_count = response.css(".topic_stats::text").re_first(
73+
r"(\d+) 人感谢", "0"
74+
)
7375

7476
topic_content = response.css(".cell .topic_content").get("")
7577

0 commit comments

Comments
 (0)