@@ -142,7 +142,8 @@ async def request(self, method, url, **kwargs) -> Union[str, Any]:
142142 elif data ["code" ] == self .IP_ERROR_CODE :
143143 raise IPBlockError (self .IP_ERROR_STR )
144144 else :
145- raise DataFetchError (data .get ("msg" , None ))
145+ err_msg = data .get ("msg" , None ) or f"{ response .text } "
146+ raise DataFetchError (err_msg )
146147
147148 async def get (self , uri : str , params = None ) -> Dict :
148149 """
@@ -507,38 +508,40 @@ async def get_notes_by_creator(
507508 creator : str ,
508509 cursor : str ,
509510 page_size : int = 30 ,
511+ xsec_token : str = "" ,
512+ xsec_source : str = "pc_feed" ,
510513 ) -> Dict :
511514 """
512515 获取博主的笔记
513516 Args:
514517 creator: 博主ID
515518 cursor: 上一页最后一条笔记的ID
516519 page_size: 分页数据长度
520+ xsec_token: 验证token
521+ xsec_source: 渠道来源
517522
518523 Returns:
519524
520525 """
521- uri = "/api/sns/web/v1/user_posted"
522- data = {
523- "user_id" : creator ,
524- "cursor" : cursor ,
525- "num" : page_size ,
526- "image_formats" : "jpg,webp,avif" ,
527- }
528- return await self .get (uri , data )
526+ uri = f"/api/sns/web/v1/user_posted?num={ page_size } &cursor={ cursor } &user_id={ creator } &xsec_token={ xsec_token } &xsec_source={ xsec_source } "
527+ return await self .get (uri )
529528
530529 async def get_all_notes_by_creator (
531530 self ,
532531 user_id : str ,
533532 crawl_interval : float = 1.0 ,
534533 callback : Optional [Callable ] = None ,
534+ xsec_token : str = "" ,
535+ xsec_source : str = "pc_feed" ,
535536 ) -> List [Dict ]:
536537 """
537538 获取指定用户下的所有发过的帖子,该方法会一直查找一个用户下的所有帖子信息
538539 Args:
539540 user_id: 用户ID
540541 crawl_interval: 爬取一次的延迟单位(秒)
541542 callback: 一次分页爬取结束后的更新回调函数
543+ xsec_token: 验证token
544+ xsec_source: 渠道来源
542545
543546 Returns:
544547
@@ -547,7 +550,7 @@ async def get_all_notes_by_creator(
547550 notes_has_more = True
548551 notes_cursor = ""
549552 while notes_has_more and len (result ) < config .CRAWLER_MAX_NOTES_COUNT :
550- notes_res = await self .get_notes_by_creator (user_id , notes_cursor )
553+ notes_res = await self .get_notes_by_creator (user_id , notes_cursor , xsec_token = xsec_token , xsec_source = xsec_source )
551554 if not notes_res :
552555 utils .logger .error (
553556 f"[XiaoHongShuClient.get_notes_by_creator] The current creator may have been banned by xhs, so they cannot access the data."
0 commit comments