@@ -87,27 +87,27 @@ async def update_xhs_note(note_item: Dict):
8787 video_url = ',' .join (get_video_url_arr (note_item ))
8888
8989 local_db_item = {
90- "note_id" : note_item .get ("note_id" ),
91- "type" : note_item .get ("type" ),
92- "title" : note_item .get ("title" ) or note_item .get ("desc" , "" )[:255 ],
93- "desc" : note_item .get ("desc" , "" ),
94- "video_url" : video_url ,
95- "time" : note_item .get ("time" ),
96- "last_update_time" : note_item .get ("last_update_time" , 0 ),
97- "user_id" : user_info .get ("user_id" ),
98- "nickname" : user_info .get ("nickname" ),
99- "avatar" : user_info .get ("avatar" ),
100- "liked_count" : interact_info .get ("liked_count" ),
101- "collected_count" : interact_info .get ("collected_count" ),
102- "comment_count" : interact_info .get ("comment_count" ),
103- "share_count" : interact_info .get ("share_count" ),
104- "ip_location" : note_item .get ("ip_location" , "" ),
105- "image_list" : ',' .join ([img .get ('url' , '' ) for img in image_list ]),
106- "tag_list" : ',' .join ([tag .get ('name' , '' ) for tag in tag_list if tag .get ('type' ) == 'topic' ]),
107- "last_modify_ts" : utils .get_current_timestamp (),
108- "note_url" : f"https://www.xiaohongshu.com/explore/{ note_id } ?xsec_token={ note_item .get ('xsec_token' )} &xsec_source=pc_search" ,
109- "source_keyword" : source_keyword_var .get (),
110- "xsec_token" : note_item .get ("xsec_token" ),
90+ "note_id" : note_item .get ("note_id" ), # 帖子id
91+ "type" : note_item .get ("type" ), # 帖子类型
92+ "title" : note_item .get ("title" ) or note_item .get ("desc" , "" )[:255 ], # 帖子标题
93+ "desc" : note_item .get ("desc" , "" ), # 帖子描述
94+ "video_url" : video_url , # 帖子视频url
95+ "time" : note_item .get ("time" ), # 帖子发布时间
96+ "last_update_time" : note_item .get ("last_update_time" , 0 ), # 帖子最后更新时间
97+ "user_id" : user_info .get ("user_id" ), # 用户id
98+ "nickname" : user_info .get ("nickname" ), # 用户昵称
99+ "avatar" : user_info .get ("avatar" ), # 用户头像
100+ "liked_count" : interact_info .get ("liked_count" ), # 点赞数
101+ "collected_count" : interact_info .get ("collected_count" ), # 收藏数
102+ "comment_count" : interact_info .get ("comment_count" ), # 评论数
103+ "share_count" : interact_info .get ("share_count" ), # 分享数
104+ "ip_location" : note_item .get ("ip_location" , "" ), # ip地址
105+ "image_list" : ',' .join ([img .get ('url' , '' ) for img in image_list ]), # 图片url
106+ "tag_list" : ',' .join ([tag .get ('name' , '' ) for tag in tag_list if tag .get ('type' ) == 'topic' ]), # 标签
107+ "last_modify_ts" : utils .get_current_timestamp (), # 最后更新时间戳(MediaCrawler程序生成的,主要用途在db存储的时候记录一条记录最新更新时间)
108+ "note_url" : f"https://www.xiaohongshu.com/explore/{ note_id } ?xsec_token={ note_item .get ('xsec_token' )} &xsec_source=pc_search" , # 帖子url
109+ "source_keyword" : source_keyword_var .get (), # 搜索关键词
110+ "xsec_token" : note_item .get ("xsec_token" ), # xsec_token
111111 }
112112 utils .logger .info (f"[store.xhs.update_xhs_note] xhs note: { local_db_item } " )
113113 await XhsStoreFactory .create_store ().store_content (local_db_item )
@@ -144,18 +144,18 @@ async def update_xhs_note_comment(note_id: str, comment_item: Dict):
144144 comment_pictures = [item .get ("url_default" , "" ) for item in comment_item .get ("pictures" , [])]
145145 target_comment = comment_item .get ("target_comment" , {})
146146 local_db_item = {
147- "comment_id" : comment_id ,
148- "create_time" : comment_item .get ("create_time" ),
149- "ip_location" : comment_item .get ("ip_location" ),
150- "note_id" : note_id ,
151- "content" : comment_item .get ("content" ),
152- "user_id" : user_info .get ("user_id" ),
153- "nickname" : user_info .get ("nickname" ),
154- "avatar" : user_info .get ("image" ),
155- "sub_comment_count" : comment_item .get ("sub_comment_count" , 0 ),
156- "pictures" : "," .join (comment_pictures ),
157- "parent_comment_id" : target_comment .get ("id" , 0 ),
158- "last_modify_ts" : utils .get_current_timestamp (),
147+ "comment_id" : comment_id , # 评论id
148+ "create_time" : comment_item .get ("create_time" ), # 评论时间
149+ "ip_location" : comment_item .get ("ip_location" ), # ip地址
150+ "note_id" : note_id , # 帖子id
151+ "content" : comment_item .get ("content" ), # 评论内容
152+ "user_id" : user_info .get ("user_id" ), # 用户id
153+ "nickname" : user_info .get ("nickname" ), # 用户昵称
154+ "avatar" : user_info .get ("image" ), # 用户头像
155+ "sub_comment_count" : comment_item .get ("sub_comment_count" , 0 ), # 子评论数
156+ "pictures" : "," .join (comment_pictures ), # 评论图片
157+ "parent_comment_id" : target_comment .get ("id" , 0 ), # 父评论id
158+ "last_modify_ts" : utils .get_current_timestamp (), # 最后更新时间戳(MediaCrawler程序生成的,主要用途在db存储的时候记录一条记录最新更新时间)
159159 "like_count" : comment_item .get ("like_count" , 0 ),
160160 }
161161 utils .logger .info (f"[store.xhs.update_xhs_note_comment] xhs note comment:{ local_db_item } " )
@@ -186,18 +186,18 @@ async def save_creator(user_id: str, creator: Dict):
186186 interaction = i .get ('count' )
187187
188188 local_db_item = {
189- 'user_id' : user_id ,
190- 'nickname' : user_info .get ('nickname' ),
191- 'gender' : '女' if user_info .get ('gender' ) == 1 else '男' ,
192- 'avatar' : user_info .get ('images' ),
193- 'desc' : user_info .get ('desc' ),
194- 'ip_location' : user_info .get ('ipLocation' ),
195- 'follows' : follows ,
196- 'fans' : fans ,
197- 'interaction' : interaction ,
189+ 'user_id' : user_id , # 用户id
190+ 'nickname' : user_info .get ('nickname' ), # 昵称
191+ 'gender' : '女' if user_info .get ('gender' ) == 1 else '男' , # 性别
192+ 'avatar' : user_info .get ('images' ), # 头像
193+ 'desc' : user_info .get ('desc' ), # 个人描述
194+ 'ip_location' : user_info .get ('ipLocation' ), # ip地址
195+ 'follows' : follows , # 关注数
196+ 'fans' : fans , # 粉丝数
197+ 'interaction' : interaction , # 互动数
198198 'tag_list' : json .dumps ({tag .get ('tagType' ): tag .get ('name' ) for tag in creator .get ('tags' )},
199- ensure_ascii = False ),
200- "last_modify_ts" : utils .get_current_timestamp (),
199+ ensure_ascii = False ), # 标签
200+ "last_modify_ts" : utils .get_current_timestamp (), # 最后更新时间戳(MediaCrawler程序生成的,主要用途在db存储的时候记录一条记录最新更新时间)
201201 }
202202 utils .logger .info (f"[store.xhs.save_creator] creator:{ local_db_item } " )
203203 await XhsStoreFactory .create_store ().store_creator (local_db_item )
0 commit comments