@@ -39,7 +39,7 @@ def parse_comment(response: scrapy.http.response.html.HtmlResponse, topic_id):
3939 # if not self.db.exist(CommentItem, comment_id):
4040 cbox = reply_row .css ("tr" )
4141 author_name = cbox .css (".dark::text" ).get ("-1" )
42- reply_content = cbox .css ( ".reply_content" ). xpath ("string(.)" ).get ("" )
42+ reply_content = cbox .xpath ('.//div[@class="reply_content"]' ).get ("" )
4343 reply_time = cbox .css (".ago::attr(title)" ).get ("" )
4444 thank_count = cbox .css (".fade::text" ).get ("0" ).strip ()
4545 yield CommentItem (
@@ -62,14 +62,16 @@ def parse_topic(response: scrapy.http.response.html.HtmlResponse, topic_id):
6262 topic_click_count = response .css (".header > small::text" ).re_first (r"\d+" , "-1" )
6363 topic_tags = response .css (".tag::attr(href)" ).re (r"/tag/(.*)" )
6464 topic_vote = response .xpath ('(//a[@class="vote"])[1]/text()' ).re_first (r"\d+" , "0" )
65- # need login, some topics may not have this
65+ # need login, some topics may not have
6666 topic_favorite_count = - 1
6767 topic_thank_count = - 1
68- topic_stats = response .css (".topic_stats::text" ).re (r"\d+" )
69- if len (topic_stats ) == 3 :
70- # topic_click_count = topic_stats[0]
71- topic_favorite_count = topic_stats [1 ]
72- topic_thank_count = topic_stats [2 ]
68+ if response .css (".topic_stats::text" ).get () is not None :
69+ topic_favorite_count = response .css (".topic_stats::text" ).re_first (
70+ r"(\d+) 人收藏" , "0"
71+ )
72+ topic_thank_count = response .css (".topic_stats::text" ).re_first (
73+ r"(\d+) 人感谢" , "0"
74+ )
7375
7476 topic_content = response .css (".cell .topic_content" ).get ("" )
7577
0 commit comments