Skip to content

Commit d9c78d3

Browse files
committed
更新定时任务频率为每三小时一次,并优化评论摘要更新逻辑,增加评论数量的缓存和判断机制。
1 parent bbd84e1 commit d9c78d3

File tree

2 files changed

+70
-31
lines changed

2 files changed

+70
-31
lines changed

.github/workflows/fetch-comments.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ name: Fetch HackerNews Stories
22

33
on:
44
schedule:
5-
- cron: "0 0,4,9,13 * * *" # 北京时间 8:00, 12:00, 17:00, 21:00
5+
- cron: "0 */3 * * *" # 每三个小时一次
66
workflow_dispatch: # 允许手动触发
77

88
permissions:

scripts/fetch_hn.py

Lines changed: 69 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -249,7 +249,7 @@ def _save_cache(self):
249249
print(f"保存缓存文件失败: {e}")
250250

251251
def get(self, story_id):
252-
"""获取缓存的故事"""
252+
"""获取缓存的故事,如果评论数量增加且原数量小于20,返回需要更新摘要的标志"""
253253
if str(story_id) not in self.cache:
254254
return None
255255

@@ -265,6 +265,10 @@ def get(self, story_id):
265265
# 转换时间格式
266266
if "data" in story:
267267
story["data"]["time"] = datetime.fromisoformat(story["data"]["time"])
268+
269+
# 添加一个标志,表示是否需要更新评论摘要
270+
story["needs_comment_update"] = False
271+
268272
return story
269273

270274
def set(
@@ -274,6 +278,7 @@ def set(
274278
article_content=None,
275279
article_summary=None,
276280
comments_summary=None,
281+
comments_count=0, # 新增参数:评论数量
277282
):
278283
"""缓存故事数据"""
279284
# 设置新数据前先清理过期缓存
@@ -285,6 +290,7 @@ def set(
285290
"article_content": article_content,
286291
"article_summary": article_summary,
287292
"comments_summary": comments_summary,
293+
"comments_count": comments_count, # 保存评论数量
288294
"cache_time": datetime.now().isoformat(),
289295
}
290296
self._save_cache()
@@ -348,22 +354,48 @@ def fetch_top_stories():
348354
try:
349355
print(f"正在处理第 {i}/100 个故事 (ID: {story_id})...")
350356

351-
# 检查缓存
352-
cached_data = cache.get(story_id)
353-
if cached_data:
354-
print(f"使用缓存的故事数据 (ID: {story_id})")
355-
stories.append(cached_data["data"])
356-
continue
357-
358-
# 如果没有缓存,获取新数据
357+
# 获取故事数据(无论是否缓存)
359358
story = fetch_hn_item(story_id)
360359
if not story:
361360
continue
362361

362+
# 获取当前评论数量
363+
current_comments_count = len(story.get("kids", []))
364+
365+
# 检查缓存
366+
cached_data = cache.get(story_id)
367+
368+
# 判断是否需要更新评论摘要
369+
need_update_comments = False
370+
371+
if cached_data:
372+
# 检查评论数量是否增加且原数量小于20
373+
cached_comments_count = cached_data.get("comments_count", 0)
374+
if (
375+
cached_comments_count < 20
376+
and current_comments_count > cached_comments_count
377+
):
378+
print(
379+
f"评论数量从 {cached_comments_count} 增加到 {current_comments_count},将重新生成摘要"
380+
)
381+
need_update_comments = True
382+
else:
383+
print(f"使用缓存的故事数据 (ID: {story_id})")
384+
stories.append(cached_data["data"])
385+
continue
386+
363387
# 获取文章内容并生成摘要
364388
article_content = None
365389
article_summary = "无法获取文章内容"
366-
if "url" in story:
390+
391+
# 如果有缓存且只需更新评论,复用文章内容和摘要
392+
if cached_data and need_update_comments:
393+
article_content = cached_data.get("article_content")
394+
article_summary = cached_data["data"].get(
395+
"article_summary", "无法获取文章内容"
396+
)
397+
# 否则获取新的文章内容和摘要
398+
elif "url" in story:
367399
print(f"获取文章内容: {story['url']}")
368400
article_content = get_article_content(story["url"])
369401
if article_content:
@@ -372,26 +404,32 @@ def fetch_top_stories():
372404
"请用中文简明扼要地总结这篇文章的主要内容,限制在200字以内。",
373405
)
374406

375-
# 获取评论文本
376-
print(f"获取评论内容...")
377-
comments_texts = []
378-
if "kids" in story:
379-
for comment_id in story["kids"][:15]:
380-
comment = fetch_hn_item(comment_id)
381-
if (
382-
comment
383-
and not comment.get("deleted")
384-
and not comment.get("dead")
385-
):
386-
clean_text = clean_html_text(comment.get("text", ""))
387-
if clean_text:
388-
author = comment.get("by", "匿名")
389-
comments_texts.append(f"[{author}]: {clean_text}")
390-
391-
comments_text = "\n\n---\n\n".join(comments_texts)
407+
# 获取评论文本 - 如果缓存需要更新或无缓存
392408
comments_summary = "暂无评论"
393-
if comments_text:
394-
comments_summary = get_summary(comments_text, comments_prompt)
409+
if need_update_comments or not cached_data:
410+
print(f"获取评论内容...")
411+
comments_texts = []
412+
if "kids" in story:
413+
for comment_id in story["kids"][:15]:
414+
comment = fetch_hn_item(comment_id)
415+
if (
416+
comment
417+
and not comment.get("deleted")
418+
and not comment.get("dead")
419+
):
420+
clean_text = clean_html_text(comment.get("text", ""))
421+
if clean_text:
422+
author = comment.get("by", "匿名")
423+
comments_texts.append(f"[{author}]: {clean_text}")
424+
425+
comments_text = "\n\n---\n\n".join(comments_texts)
426+
if comments_text:
427+
comments_summary = get_summary(comments_text, comments_prompt)
428+
else:
429+
# 使用缓存的评论摘要
430+
comments_summary = cached_data["data"].get(
431+
"comments_summary", "暂无评论"
432+
)
395433

396434
story_data = {
397435
"title": story.get("title", "无标题"),
@@ -401,7 +439,7 @@ def fetch_top_stories():
401439
"author": story.get("by", "匿名"),
402440
"score": story.get("score", 0),
403441
"time": datetime.fromtimestamp(story.get("time", 0)).isoformat(),
404-
"comments_count": len(story.get("kids", [])),
442+
"comments_count": current_comments_count,
405443
"article_summary": article_summary,
406444
"comments_summary": comments_summary,
407445
"comments_url": f"https://news.ycombinator.com/item?id={story_id}",
@@ -414,6 +452,7 @@ def fetch_top_stories():
414452
article_content=article_content,
415453
article_summary=article_summary,
416454
comments_summary=comments_summary,
455+
comments_count=current_comments_count, # 保存当前评论数量
417456
)
418457

419458
# 转换时间格式以适应模板

0 commit comments

Comments
 (0)