Skip to content

Commit b2d5291

Browse files
authored
Merge pull request #684 from 2513502304/main
添加抖音笔记图片的存储逻辑
2 parents 8ab1b7e + 214ccaa commit b2d5291

File tree

3 files changed

+40
-37
lines changed

3 files changed

+40
-37
lines changed

schema/sqlite_tables.sql

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,7 @@ CREATE TABLE douyin_aweme (
150150
cover_url TEXT DEFAULT NULL,
151151
video_download_url TEXT DEFAULT NULL,
152152
music_download_url TEXT DEFAULT NULL,
153+
note_download_url TEXT DEFAULT NULL,
153154
source_keyword TEXT DEFAULT ''
154155
);
155156

@@ -565,4 +566,4 @@ CREATE TABLE zhihu_creator (
565566
last_modify_ts INTEGER NOT NULL
566567
);
567568

568-
CREATE UNIQUE INDEX idx_zhihu_creator_user_id ON zhihu_creator(user_id);
569+
CREATE UNIQUE INDEX idx_zhihu_creator_user_id ON zhihu_creator(user_id);

schema/tables.sql

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,7 @@ CREATE TABLE `douyin_aweme`
150150
`cover_url` varchar(500) DEFAULT NULL COMMENT '视频封面图URL',
151151
`video_download_url` varchar(1024) DEFAULT NULL COMMENT '视频下载地址',
152152
`music_download_url` varchar(1024) DEFAULT NULL COMMENT '音乐下载地址',
153+
`note_download_url` varchar(5120) DEFAULT NULL COMMENT '笔记下载地址',
153154
PRIMARY KEY (`id`),
154155
KEY `idx_douyin_awem_aweme_i_6f7bc6` (`aweme_id`),
155156
KEY `idx_douyin_awem_create__299dfe` (`create_time`)
@@ -593,4 +594,4 @@ alter table douyin_aweme_comment add column `like_count` varchar(255) NOT NULL D
593594

594595
alter table xhs_note add column xsec_token varchar(50) default null comment '签名算法';
595596
alter table douyin_aweme_comment add column `pictures` varchar(500) NOT NULL DEFAULT '' COMMENT '评论图片列表';
596-
alter table bilibili_video_comment add column `like_count` varchar(255) NOT NULL DEFAULT '0' COMMENT '点赞数';
597+
alter table bilibili_video_comment add column `like_count` varchar(255) NOT NULL DEFAULT '0' COMMENT '点赞数';

store/douyin/__init__.py

Lines changed: 36 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
# 详细许可条款请参阅项目根目录下的LICENSE文件。
99
# 使用本代码即表示您同意遵守上述原则和LICENSE中的所有条款。
1010

11-
1211
# -*- coding: utf-8 -*-
1312
# @Author : [email protected]
1413
# @Time : 2024/1/14 18:46
@@ -26,19 +25,41 @@ class DouyinStoreFactory:
2625
"csv": DouyinCsvStoreImplement,
2726
"db": DouyinDbStoreImplement,
2827
"json": DouyinJsonStoreImplement,
29-
"sqlite": DouyinSqliteStoreImplement
28+
"sqlite": DouyinSqliteStoreImplement,
3029
}
3130

3231
@staticmethod
3332
def create_store() -> AbstractStore:
3433
store_class = DouyinStoreFactory.STORES.get(config.SAVE_DATA_OPTION)
3534
if not store_class:
36-
raise ValueError(
37-
"[DouyinStoreFactory.create_store] Invalid save option only supported csv or db or json or sqlite ..."
38-
)
35+
raise ValueError("[DouyinStoreFactory.create_store] Invalid save option only supported csv or db or json or sqlite ...")
3936
return store_class()
4037

4138

39+
def _extract_note_image_list(aweme_detail: Dict) -> List[str]:
40+
"""
41+
提取笔记图片列表
42+
43+
Args:
44+
aweme_detail (Dict): 抖音内容详情
45+
46+
Returns:
47+
List[str]: 笔记图片列表
48+
"""
49+
images_res: List[str] = []
50+
images: List[Dict] = aweme_detail.get("images", [])
51+
52+
if not images:
53+
return []
54+
55+
for image in images:
56+
image_url_list = image.get("url_list", []) # download_url_list 为带水印的图片,url_list 为无水印的图片
57+
if image_url_list:
58+
images_res.append(image_url_list[-1])
59+
60+
return images_res
61+
62+
4263
def _extract_comment_image_list(comment_item: Dict) -> List[str]:
4364
"""
4465
提取评论图片列表
@@ -76,9 +97,7 @@ def _extract_content_cover_url(aweme_detail: Dict) -> str:
7697
res_cover_url = ""
7798

7899
video_item = aweme_detail.get("video", {})
79-
raw_cover_url_list = (
80-
video_item.get("raw_cover", {}) or video_item.get("origin_cover", {})
81-
).get("url_list", [])
100+
raw_cover_url_list = (video_item.get("raw_cover", {}) or video_item.get("origin_cover", {})).get("url_list", [])
82101
if raw_cover_url_list and len(raw_cover_url_list) > 1:
83102
res_cover_url = raw_cover_url_list[1]
84103

@@ -148,14 +167,11 @@ async def update_douyin_aweme(aweme_item: Dict):
148167
"cover_url": _extract_content_cover_url(aweme_item),
149168
"video_download_url": _extract_video_download_url(aweme_item),
150169
"music_download_url": _extract_music_download_url(aweme_item),
170+
"note_download_url": ",".join(_extract_note_image_list(aweme_item)),
151171
"source_keyword": source_keyword_var.get(),
152172
}
153-
utils.logger.info(
154-
f"[store.douyin.update_douyin_aweme] douyin aweme id:{aweme_id}, title:{save_content_item.get('title')}"
155-
)
156-
await DouyinStoreFactory.create_store().store_content(
157-
content_item=save_content_item
158-
)
173+
utils.logger.info(f"[store.douyin.update_douyin_aweme] douyin aweme id:{aweme_id}, title:{save_content_item.get('title')}")
174+
await DouyinStoreFactory.create_store().store_content(content_item=save_content_item)
159175

160176

161177
async def batch_update_dy_aweme_comments(aweme_id: str, comments: List[Dict]):
@@ -168,20 +184,12 @@ async def batch_update_dy_aweme_comments(aweme_id: str, comments: List[Dict]):
168184
async def update_dy_aweme_comment(aweme_id: str, comment_item: Dict):
169185
comment_aweme_id = comment_item.get("aweme_id")
170186
if aweme_id != comment_aweme_id:
171-
utils.logger.error(
172-
f"[store.douyin.update_dy_aweme_comment] comment_aweme_id: {comment_aweme_id} != aweme_id: {aweme_id}"
173-
)
187+
utils.logger.error(f"[store.douyin.update_dy_aweme_comment] comment_aweme_id: {comment_aweme_id} != aweme_id: {aweme_id}")
174188
return
175189
user_info = comment_item.get("user", {})
176190
comment_id = comment_item.get("cid")
177191
parent_comment_id = comment_item.get("reply_id", "0")
178-
avatar_info = (
179-
user_info.get("avatar_medium", {})
180-
or user_info.get("avatar_300x300", {})
181-
or user_info.get("avatar_168x168", {})
182-
or user_info.get("avatar_thumb", {})
183-
or {}
184-
)
192+
avatar_info = (user_info.get("avatar_medium", {}) or user_info.get("avatar_300x300", {}) or user_info.get("avatar_168x168", {}) or user_info.get("avatar_thumb", {}) or {})
185193
save_comment_item = {
186194
"comment_id": comment_id,
187195
"create_time": comment_item.get("create_time"),
@@ -196,20 +204,14 @@ async def update_dy_aweme_comment(aweme_id: str, comment_item: Dict):
196204
"nickname": user_info.get("nickname"),
197205
"avatar": avatar_info.get("url_list", [""])[0],
198206
"sub_comment_count": str(comment_item.get("reply_comment_total", 0)),
199-
"like_count": (
200-
comment_item.get("digg_count") if comment_item.get("digg_count") else 0
201-
),
207+
"like_count": (comment_item.get("digg_count") if comment_item.get("digg_count") else 0),
202208
"last_modify_ts": utils.get_current_timestamp(),
203209
"parent_comment_id": parent_comment_id,
204210
"pictures": ",".join(_extract_comment_image_list(comment_item)),
205211
}
206-
utils.logger.info(
207-
f"[store.douyin.update_dy_aweme_comment] douyin aweme comment: {comment_id}, content: {save_comment_item.get('content')}"
208-
)
212+
utils.logger.info(f"[store.douyin.update_dy_aweme_comment] douyin aweme comment: {comment_id}, content: {save_comment_item.get('content')}")
209213

210-
await DouyinStoreFactory.create_store().store_comment(
211-
comment_item=save_comment_item
212-
)
214+
await DouyinStoreFactory.create_store().store_comment(comment_item=save_comment_item)
213215

214216

215217
async def save_creator(user_id: str, creator: Dict):
@@ -220,8 +222,7 @@ async def save_creator(user_id: str, creator: Dict):
220222
"user_id": user_id,
221223
"nickname": user_info.get("nickname"),
222224
"gender": gender_map.get(user_info.get("gender"), "未知"),
223-
"avatar": f"https://p3-pc.douyinpic.com/img/{avatar_uri}"
224-
+ r"~c5_300x300.jpeg?from=2956013662",
225+
"avatar": f"https://p3-pc.douyinpic.com/img/{avatar_uri}" + r"~c5_300x300.jpeg?from=2956013662",
225226
"desc": user_info.get("signature"),
226227
"ip_location": user_info.get("ip_location"),
227228
"follows": user_info.get("following_count", 0),

0 commit comments

Comments
 (0)