@@ -62,6 +62,48 @@ def _extract_comment_image_list(comment_item: Dict) -> List[str]:
6262 return images_res
6363
6464
65+ def _extract_content_cover_url (aweme_detail : Dict ) -> str :
66+ """
67+ 提取视频封面地址
68+
69+ Args:
70+ aweme_detail (Dict): 抖音内容详情
71+
72+ Returns:
73+ str: 视频封面地址
74+ """
75+ res_cover_url = ""
76+
77+ video_item = aweme_detail .get ("video" , {})
78+ raw_cover_url_list = (
79+ video_item .get ("raw_cover" , {}) or video_item .get ("origin_cover" , {})
80+ ).get ("url_list" , [])
81+ if raw_cover_url_list and len (raw_cover_url_list ) > 1 :
82+ res_cover_url = raw_cover_url_list [1 ]
83+
84+ return res_cover_url
85+
86+
87+ def _extract_video_download_url (aweme_detail : Dict ) -> str :
88+ """
89+ 提取视频下载地址
90+
91+ Args:
92+ aweme_detail (Dict): 抖音视频
93+
94+ Returns:
95+ str: 视频下载地址
96+ """
97+ video_item = aweme_detail .get ("video" , {})
98+ url_h264_list = video_item .get ("play_addr_h264" , {}).get ("url_list" , [])
99+ url_256_list = video_item .get ("play_addr_256" , {}).get ("url_list" , [])
100+ url_list = video_item .get ("play_addr" , {}).get ("url_list" , [])
101+ actual_url_list = url_h264_list or url_256_list or url_list
102+ if not actual_url_list or len (actual_url_list ) < 2 :
103+ return ""
104+ return actual_url_list [- 1 ]
105+
106+
65107async def update_douyin_aweme (aweme_item : Dict ):
66108 aweme_id = aweme_item .get ("aweme_id" )
67109 user_info = aweme_item .get ("author" , {})
@@ -86,6 +128,8 @@ async def update_douyin_aweme(aweme_item: Dict):
86128 "ip_location" : aweme_item .get ("ip_label" , "" ),
87129 "last_modify_ts" : utils .get_current_timestamp (),
88130 "aweme_url" : f"https://www.douyin.com/video/{ aweme_id } " ,
131+ "cover_url" : _extract_content_cover_url (aweme_item ),
132+ "video_download_url" : _extract_video_download_url (aweme_item ),
89133 "source_keyword" : source_keyword_var .get (),
90134 }
91135 utils .logger .info (
@@ -114,11 +158,11 @@ async def update_dy_aweme_comment(aweme_id: str, comment_item: Dict):
114158 comment_id = comment_item .get ("cid" )
115159 parent_comment_id = comment_item .get ("reply_id" , "0" )
116160 avatar_info = (
117- user_info .get ("avatar_medium" , {})
118- or user_info .get ("avatar_300x300" , {})
119- or user_info .get ("avatar_168x168" , {})
120- or user_info .get ("avatar_thumb" , {})
121- or {}
161+ user_info .get ("avatar_medium" , {})
162+ or user_info .get ("avatar_300x300" , {})
163+ or user_info .get ("avatar_168x168" , {})
164+ or user_info .get ("avatar_thumb" , {})
165+ or {}
122166 )
123167 save_comment_item = {
124168 "comment_id" : comment_id ,
@@ -159,7 +203,7 @@ async def save_creator(user_id: str, creator: Dict):
159203 "nickname" : user_info .get ("nickname" ),
160204 "gender" : gender_map .get (user_info .get ("gender" ), "未知" ),
161205 "avatar" : f"https://p3-pc.douyinpic.com/img/{ avatar_uri } "
162- + r"~c5_300x300.jpeg?from=2956013662" ,
206+ + r"~c5_300x300.jpeg?from=2956013662" ,
163207 "desc" : user_info .get ("signature" ),
164208 "ip_location" : user_info .get ("ip_location" ),
165209 "follows" : user_info .get ("following_count" , 0 ),
0 commit comments