1- # 声明:本代码仅供学习和研究目的使用。使用者应遵守以下原则:
2- # 1. 不得用于任何商业用途。
3- # 2. 使用时应遵守目标平台的使用条款和robots.txt规则。
4- # 3. 不得进行大规模爬取或对平台造成运营干扰。
5- # 4. 应合理控制请求频率,避免给目标平台带来不必要的负担。
1+ # 声明:本代码仅供学习和研究目的使用。使用者应遵守以下原则:
2+ # 1. 不得用于任何商业用途。
3+ # 2. 使用时应遵守目标平台的使用条款和robots.txt规则。
4+ # 3. 不得进行大规模爬取或对平台造成运营干扰。
5+ # 4. 应合理控制请求频率,避免给目标平台带来不必要的负担。
66# 5. 不得用于任何非法或不当的用途。
7- #
8- # 详细许可条款请参阅项目根目录下的LICENSE文件。
9- # 使用本代码即表示您同意遵守上述原则和LICENSE中的所有条款。
7+ #
8+ # 详细许可条款请参阅项目根目录下的LICENSE文件。
9+ # 使用本代码即表示您同意遵守上述原则和LICENSE中的所有条款。
1010
1111
1212# -*- coding: utf-8 -*-
@@ -25,18 +25,43 @@ class DouyinStoreFactory:
2525 STORES = {
2626 "csv" : DouyinCsvStoreImplement ,
2727 "db" : DouyinDbStoreImplement ,
28- "json" : DouyinJsonStoreImplement
28+ "json" : DouyinJsonStoreImplement ,
2929 }
3030
3131 @staticmethod
3232 def create_store () -> AbstractStore :
3333 store_class = DouyinStoreFactory .STORES .get (config .SAVE_DATA_OPTION )
3434 if not store_class :
3535 raise ValueError (
36- "[DouyinStoreFactory.create_store] Invalid save option only supported csv or db or json ..." )
36+ "[DouyinStoreFactory.create_store] Invalid save option only supported csv or db or json ..."
37+ )
3738 return store_class ()
3839
3940
41+ def _extract_comment_image_list (comment_item : Dict ) -> List [str ]:
42+ """
43+ 提取评论图片列表
44+
45+ Args:
46+ comment_item (Dict): 抖音评论
47+
48+ Returns:
49+ List[str]: 评论图片列表
50+ """
51+ images_res : List [str ] = []
52+ image_list : List [Dict ] = comment_item .get ("image_list" , [])
53+
54+ if not image_list :
55+ return []
56+
57+ for image in image_list :
58+ image_url_list = image .get ("origin_url" , {}).get ("url_list" , [])
59+ if image_url_list and len (image_url_list ) > 1 :
60+ images_res .append (image_url_list [1 ])
61+
62+ return images_res
63+
64+
4065async def update_douyin_aweme (aweme_item : Dict ):
4166 aweme_id = aweme_item .get ("aweme_id" )
4267 user_info = aweme_item .get ("author" , {})
@@ -64,8 +89,11 @@ async def update_douyin_aweme(aweme_item: Dict):
6489 "source_keyword" : source_keyword_var .get (),
6590 }
6691 utils .logger .info (
67- f"[store.douyin.update_douyin_aweme] douyin aweme id:{ aweme_id } , title:{ save_content_item .get ('title' )} " )
68- await DouyinStoreFactory .create_store ().store_content (content_item = save_content_item )
92+ f"[store.douyin.update_douyin_aweme] douyin aweme id:{ aweme_id } , title:{ save_content_item .get ('title' )} "
93+ )
94+ await DouyinStoreFactory .create_store ().store_content (
95+ content_item = save_content_item
96+ )
6997
7098
7199async def batch_update_dy_aweme_comments (aweme_id : str , comments : List [Dict ]):
@@ -79,13 +107,19 @@ async def update_dy_aweme_comment(aweme_id: str, comment_item: Dict):
79107 comment_aweme_id = comment_item .get ("aweme_id" )
80108 if aweme_id != comment_aweme_id :
81109 utils .logger .error (
82- f"[store.douyin.update_dy_aweme_comment] comment_aweme_id: { comment_aweme_id } != aweme_id: { aweme_id } " )
110+ f"[store.douyin.update_dy_aweme_comment] comment_aweme_id: { comment_aweme_id } != aweme_id: { aweme_id } "
111+ )
83112 return
84113 user_info = comment_item .get ("user" , {})
85114 comment_id = comment_item .get ("cid" )
86115 parent_comment_id = comment_item .get ("reply_id" , "0" )
87- avatar_info = user_info .get ("avatar_medium" , {}) or user_info .get ("avatar_300x300" , {}) or user_info .get (
88- "avatar_168x168" , {}) or user_info .get ("avatar_thumb" , {}) or {}
116+ avatar_info = (
117+ user_info .get ("avatar_medium" , {})
118+ or user_info .get ("avatar_300x300" , {})
119+ or user_info .get ("avatar_168x168" , {})
120+ or user_info .get ("avatar_thumb" , {})
121+ or {}
122+ )
89123 save_comment_item = {
90124 "comment_id" : comment_id ,
91125 "create_time" : comment_item .get ("create_time" ),
@@ -100,39 +134,39 @@ async def update_dy_aweme_comment(aweme_id: str, comment_item: Dict):
100134 "nickname" : user_info .get ("nickname" ),
101135 "avatar" : avatar_info .get ("url_list" , ["" ])[0 ],
102136 "sub_comment_count" : str (comment_item .get ("reply_comment_total" , 0 )),
103- "like_count" : comment_item .get ("digg_count" ) if comment_item .get ("digg_count" ) else 0 ,
137+ "like_count" : (
138+ comment_item .get ("digg_count" ) if comment_item .get ("digg_count" ) else 0
139+ ),
104140 "last_modify_ts" : utils .get_current_timestamp (),
105- "parent_comment_id" : parent_comment_id
141+ "parent_comment_id" : parent_comment_id ,
142+ "pictures" : "," .join (_extract_comment_image_list (comment_item )),
106143 }
107144 utils .logger .info (
108- f"[store.douyin.update_dy_aweme_comment] douyin aweme comment: { comment_id } , content: { save_comment_item .get ('content' )} " )
109-
110- await DouyinStoreFactory .create_store ().store_comment (comment_item = save_comment_item )
111-
145+ f"[store.douyin.update_dy_aweme_comment] douyin aweme comment: { comment_id } , content: { save_comment_item .get ('content' )} "
146+ )
112147
148+ await DouyinStoreFactory .create_store ().store_comment (
149+ comment_item = save_comment_item
150+ )
113151
114152
115153async def save_creator (user_id : str , creator : Dict ):
116- user_info = creator .get ('user' , {})
117- gender_map = {
118- 0 : '未知' ,
119- 1 : '男' ,
120- 2 : '女'
121- }
122- avatar_uri = user_info .get ('avatar_300x300' , {}).get ('uri' )
154+ user_info = creator .get ("user" , {})
155+ gender_map = {0 : "未知" , 1 : "男" , 2 : "女" }
156+ avatar_uri = user_info .get ("avatar_300x300" , {}).get ("uri" )
123157 local_db_item = {
124- 'user_id' : user_id ,
125- 'nickname' : user_info .get ('nickname' ),
126- 'gender' : gender_map .get (user_info .get ('gender' ), '未知' ),
127- 'avatar' : f"https://p3-pc.douyinpic.com/img/{ avatar_uri } " + r"~c5_300x300.jpeg?from=2956013662" ,
128- 'desc' : user_info .get ('signature' ),
129- 'ip_location' : user_info .get ('ip_location' ),
130- 'follows' : user_info .get ("following_count" , 0 ),
131- 'fans' : user_info .get ("max_follower_count" , 0 ),
132- 'interaction' : user_info .get ("total_favorited" , 0 ),
133- 'videos_count' : user_info .get ("aweme_count" , 0 ),
158+ "user_id" : user_id ,
159+ "nickname" : user_info .get ("nickname" ),
160+ "gender" : gender_map .get (user_info .get ("gender" ), "未知" ),
161+ "avatar" : f"https://p3-pc.douyinpic.com/img/{ avatar_uri } "
162+ + r"~c5_300x300.jpeg?from=2956013662" ,
163+ "desc" : user_info .get ("signature" ),
164+ "ip_location" : user_info .get ("ip_location" ),
165+ "follows" : user_info .get ("following_count" , 0 ),
166+ "fans" : user_info .get ("max_follower_count" , 0 ),
167+ "interaction" : user_info .get ("total_favorited" , 0 ),
168+ "videos_count" : user_info .get ("aweme_count" , 0 ),
134169 "last_modify_ts" : utils .get_current_timestamp (),
135-
136170 }
137171 utils .logger .info (f"[store.douyin.save_creator] creator:{ local_db_item } " )
138172 await DouyinStoreFactory .create_store ().store_creator (local_db_item )
0 commit comments