@@ -33,7 +33,7 @@ def __init__(self, categlory):
3333 self .startUrl = "https://search.jd.com/Search?keyword=%s&enc=utf-8" % (
3434 quote (categlory )
3535 )
36- self .commentBaseUrl = "https://sclub. jd.com/comment/productPageComments.action ?"
36+ self .commentBaseUrl = "https://api.m. jd.com/?"
3737 self .headers = {
3838 "accept" : "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,"
3939 "*/*;q=0.8,application/signed-exchange;v=b3;q=0.9" ,
@@ -74,15 +74,20 @@ def __init__(self, categlory):
7474 self .iplist = {"http" : [], "https" : []}
7575
7676 def getParamUrl (self , productid : str , page : str , score : str ):
77- params = { # 用于控制页数,页面信息数的数据,非常重要,必不可少,要不然会被JD识别出来,爬不出相应的数据。
78- "productId" : "%s" % productid ,
79- "score" : "%s" % score , # 1表示差评,2表示中评,3表示好评
80- "sortType" : "5" ,
81- "page" : "%s" % page ,
82- "pageSize" : "10" ,
83- "isShadowSku" : "0" ,
84- "rid" : "0" ,
85- "fold" : "1" ,
77+ params = {
78+ "appid" : "item-v3" ,
79+ "functionId" : "pc_club_productPageComments" ,
80+ "client" : "pc" ,
81+ "body" : { # 用于控制页数,页面信息数的数据,非常重要,必不可少,要不然会被JD识别出来,爬不出相应的数据。
82+ "productId" : "%s" % productid ,
83+ "score" : "%s" % score , # 1表示差评,2表示中评,3表示好评
84+ "sortType" : "5" ,
85+ "page" : "%s" % page ,
86+ "pageSize" : "10" ,
87+ "isShadowSku" : "0" ,
88+ "rid" : "0" ,
89+ "fold" : "1" ,
90+ },
8691 }
8792 default_logger .info ("params:" + str (params ))
8893 url = self .commentBaseUrl + urlencode (params )
@@ -148,7 +153,7 @@ def getData(
148153 default_logger .info (
149154 "爬取商品评价的 url 链接是" + url + ",商品的 id 是:" + id_
150155 )
151- response = requests .get (url , params = param )
156+ response = requests .get (url )
152157 except Exception as e :
153158 default_logger .warning (e )
154159 break
0 commit comments