@@ -249,7 +249,7 @@ def _save_cache(self):
249249 print (f"保存缓存文件失败: { e } " )
250250
251251 def get (self , story_id ):
252- """获取缓存的故事"""
252+ """获取缓存的故事,如果评论数量增加且原数量小于20,返回需要更新摘要的标志 """
253253 if str (story_id ) not in self .cache :
254254 return None
255255
@@ -265,6 +265,10 @@ def get(self, story_id):
265265 # 转换时间格式
266266 if "data" in story :
267267 story ["data" ]["time" ] = datetime .fromisoformat (story ["data" ]["time" ])
268+
269+ # 添加一个标志,表示是否需要更新评论摘要
270+ story ["needs_comment_update" ] = False
271+
268272 return story
269273
270274 def set (
@@ -274,6 +278,7 @@ def set(
274278 article_content = None ,
275279 article_summary = None ,
276280 comments_summary = None ,
281+ comments_count = 0 , # 新增参数:评论数量
277282 ):
278283 """缓存故事数据"""
279284 # 设置新数据前先清理过期缓存
@@ -285,6 +290,7 @@ def set(
285290 "article_content" : article_content ,
286291 "article_summary" : article_summary ,
287292 "comments_summary" : comments_summary ,
293+ "comments_count" : comments_count , # 保存评论数量
288294 "cache_time" : datetime .now ().isoformat (),
289295 }
290296 self ._save_cache ()
@@ -348,22 +354,48 @@ def fetch_top_stories():
348354 try :
349355 print (f"正在处理第 { i } /100 个故事 (ID: { story_id } )..." )
350356
351- # 检查缓存
352- cached_data = cache .get (story_id )
353- if cached_data :
354- print (f"使用缓存的故事数据 (ID: { story_id } )" )
355- stories .append (cached_data ["data" ])
356- continue
357-
358- # 如果没有缓存,获取新数据
357+ # 获取故事数据(无论是否缓存)
359358 story = fetch_hn_item (story_id )
360359 if not story :
361360 continue
362361
362+ # 获取当前评论数量
363+ current_comments_count = len (story .get ("kids" , []))
364+
365+ # 检查缓存
366+ cached_data = cache .get (story_id )
367+
368+ # 判断是否需要更新评论摘要
369+ need_update_comments = False
370+
371+ if cached_data :
372+ # 检查评论数量是否增加且原数量小于20
373+ cached_comments_count = cached_data .get ("comments_count" , 0 )
374+ if (
375+ cached_comments_count < 20
376+ and current_comments_count > cached_comments_count
377+ ):
378+ print (
379+ f"评论数量从 { cached_comments_count } 增加到 { current_comments_count } ,将重新生成摘要"
380+ )
381+ need_update_comments = True
382+ else :
383+ print (f"使用缓存的故事数据 (ID: { story_id } )" )
384+ stories .append (cached_data ["data" ])
385+ continue
386+
363387 # 获取文章内容并生成摘要
364388 article_content = None
365389 article_summary = "无法获取文章内容"
366- if "url" in story :
390+
391+ # 如果有缓存且只需更新评论,复用文章内容和摘要
392+ if cached_data and need_update_comments :
393+ article_content = cached_data .get ("article_content" )
394+ article_summary = cached_data ["data" ].get (
395+ "article_summary" , "无法获取文章内容"
396+ )
397+ # 否则获取新的文章内容和摘要
398+ elif "url" in story :
367399 print (f"获取文章内容: { story ['url' ]} " )
368400 article_content = get_article_content (story ["url" ])
369401 if article_content :
@@ -372,26 +404,32 @@ def fetch_top_stories():
372404 "请用中文简明扼要地总结这篇文章的主要内容,限制在200字以内。" ,
373405 )
374406
375- # 获取评论文本
376- print (f"获取评论内容..." )
377- comments_texts = []
378- if "kids" in story :
379- for comment_id in story ["kids" ][:15 ]:
380- comment = fetch_hn_item (comment_id )
381- if (
382- comment
383- and not comment .get ("deleted" )
384- and not comment .get ("dead" )
385- ):
386- clean_text = clean_html_text (comment .get ("text" , "" ))
387- if clean_text :
388- author = comment .get ("by" , "匿名" )
389- comments_texts .append (f"[{ author } ]: { clean_text } " )
390-
391- comments_text = "\n \n ---\n \n " .join (comments_texts )
407+ # 获取评论文本 - 如果缓存需要更新或无缓存
392408 comments_summary = "暂无评论"
393- if comments_text :
394- comments_summary = get_summary (comments_text , comments_prompt )
409+ if need_update_comments or not cached_data :
410+ print (f"获取评论内容..." )
411+ comments_texts = []
412+ if "kids" in story :
413+ for comment_id in story ["kids" ][:15 ]:
414+ comment = fetch_hn_item (comment_id )
415+ if (
416+ comment
417+ and not comment .get ("deleted" )
418+ and not comment .get ("dead" )
419+ ):
420+ clean_text = clean_html_text (comment .get ("text" , "" ))
421+ if clean_text :
422+ author = comment .get ("by" , "匿名" )
423+ comments_texts .append (f"[{ author } ]: { clean_text } " )
424+
425+ comments_text = "\n \n ---\n \n " .join (comments_texts )
426+ if comments_text :
427+ comments_summary = get_summary (comments_text , comments_prompt )
428+ else :
429+ # 使用缓存的评论摘要
430+ comments_summary = cached_data ["data" ].get (
431+ "comments_summary" , "暂无评论"
432+ )
395433
396434 story_data = {
397435 "title" : story .get ("title" , "无标题" ),
@@ -401,7 +439,7 @@ def fetch_top_stories():
401439 "author" : story .get ("by" , "匿名" ),
402440 "score" : story .get ("score" , 0 ),
403441 "time" : datetime .fromtimestamp (story .get ("time" , 0 )).isoformat (),
404- "comments_count" : len ( story . get ( "kids" , [])) ,
442+ "comments_count" : current_comments_count ,
405443 "article_summary" : article_summary ,
406444 "comments_summary" : comments_summary ,
407445 "comments_url" : f"https://news.ycombinator.com/item?id={ story_id } " ,
@@ -414,6 +452,7 @@ def fetch_top_stories():
414452 article_content = article_content ,
415453 article_summary = article_summary ,
416454 comments_summary = comments_summary ,
455+ comments_count = current_comments_count , # 保存当前评论数量
417456 )
418457
419458 # 转换时间格式以适应模板
0 commit comments