Skip to content
This repository was archived by the owner on Jul 22, 2025. It is now read-only.

Commit 1b1b443

Browse files
authored
FEATURE: Changes to summaries' outdated logic. (#1108)
Before this change, a summary was only outdated when new content appeared, for topics with "best replies", when the query returned different results. The intent behind this change is to detect when a summary is outdated as a result of an edit. Additionally, we are changing the backfill candidates query to compare "ai_summary_backfill_topic_max_age_days" against "last_posted_at" instead of "created_at", to catch long-lived, active topics. This was discussed here: https://meta.discourse.org/t/ai-summarization-backfill-is-stuck-keeps-regenerating-the-same-topic/347088/14?u=roman_rizzi
1 parent d3b93f9 commit 1b1b443

File tree

7 files changed

+58
-20
lines changed

7 files changed

+58
-20
lines changed

app/jobs/scheduled/summaries_backfill.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ def backfill_candidates(summary_type)
5757
ais.target_type = 'Topic' AND
5858
ais.summary_type = '#{summary_type}'
5959
SQL
60-
.where("topics.created_at > current_timestamp - INTERVAL '#{max_age_days.to_i} DAY'")
60+
.where("topics.last_posted_at > current_timestamp - INTERVAL '#{max_age_days.to_i} DAY'")
6161
.where(
6262
<<~SQL, # (1..1) gets stored ad (1..2).
6363
ais.id IS NULL OR (

lib/summarization/fold_content.rb

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,8 +58,9 @@ def existing_summary
5858
if summary
5959
@existing_summary = summary
6060

61-
if existing_summary.original_content_sha != latest_sha
62-
@existing_summary.mark_as_outdated
61+
if summary.original_content_sha != latest_sha ||
62+
content_to_summarize.any? { |cts| cts[:last_version_at] > summary.updated_at }
63+
summary.mark_as_outdated
6364
end
6465
end
6566
end

lib/summarization/strategies/chat_messages.rb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@ def targets_data
2323
.where("chat_messages.created_at > ?", since.hours.ago)
2424
.includes(:user)
2525
.order(created_at: :asc)
26-
.pluck(:id, :username_lower, :message)
27-
.map { { id: _1, poster: _2, text: _3 } }
26+
.pluck(:id, :username_lower, :message, :updated_at)
27+
.map { { id: _1, poster: _2, text: _3, last_version_at: _4 } }
2828
end
2929

3030
def summary_extension_prompt(summary, contents)

lib/summarization/strategies/hot_topic_gists.rb

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -49,16 +49,16 @@ def targets_data
4949
.joins(:user)
5050
.where("post_number IN (?)", recent_hot_posts << op_post_number)
5151
.order(:post_number)
52-
.pluck(:post_number, :raw, :username)
52+
.pluck(:post_number, :raw, :username, :last_version_at)
5353

54-
posts_data.reduce([]) do |memo, (pn, raw, username)|
54+
posts_data.reduce([]) do |memo, (pn, raw, username, last_version_at)|
5555
raw_text = raw
5656

5757
if pn == 1 && target.topic_embed&.embed_content_cache.present?
5858
raw_text = target.topic_embed&.embed_content_cache
5959
end
6060

61-
memo << { poster: username, id: pn, text: raw_text }
61+
memo << { poster: username, id: pn, text: raw_text, last_version_at: last_version_at }
6262
end
6363
end
6464

lib/summarization/strategies/topic_summary.rb

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,16 +18,17 @@ def targets_data
1818
:post_number,
1919
:raw,
2020
:username,
21+
:last_version_at,
2122
)
2223

23-
posts_data.reduce([]) do |memo, (pn, raw, username)|
24+
posts_data.reduce([]) do |memo, (pn, raw, username, last_version_at)|
2425
raw_text = raw
2526

2627
if pn == 1 && target.topic_embed&.embed_content_cache.present?
2728
raw_text = target.topic_embed&.embed_content_cache
2829
end
2930

30-
memo << { poster: username, id: pn, text: raw_text }
31+
memo << { poster: username, id: pn, text: raw_text, last_version_at: last_version_at }
3132
end
3233
end
3334

spec/jobs/scheduled/summaries_backfill_spec.rb

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
# frozen_string_literal: true
22

33
RSpec.describe Jobs::SummariesBackfill do
4-
fab!(:topic) { Fabricate(:topic, word_count: 200, highest_post_number: 2) }
4+
fab!(:topic) do
5+
Fabricate(:topic, word_count: 200, highest_post_number: 2, last_posted_at: 2.hours.ago)
6+
end
57
let(:limit) { 24 } # guarantee two summaries per batch
68
let(:intervals) { 12 } # budget is split into intervals. Job runs every five minutes.
79

@@ -73,7 +75,7 @@
7375

7476
it "respects max age setting" do
7577
SiteSetting.ai_summary_backfill_topic_max_age_days = 1
76-
topic.update!(created_at: 2.days.ago)
78+
topic.update!(last_posted_at: 2.days.ago)
7779

7880
expect(subject.backfill_candidates(type)).to be_empty
7981
end
@@ -112,14 +114,14 @@
112114
end
113115

114116
it "updates the highest_target_number if the summary turned to be up to date" do
117+
og_highest_post_number = topic.highest_post_number
115118
existing_summary =
116119
Fabricate(
117120
:ai_summary,
118121
target: topic,
119122
updated_at: 3.hours.ago,
120-
highest_target_number: topic.highest_post_number,
123+
highest_target_number: og_highest_post_number,
121124
)
122-
og_highest_post_number = topic.highest_post_number
123125
topic.update!(highest_post_number: og_highest_post_number + 1)
124126

125127
# No prepared responses here. We don't perform a completion call.

spec/lib/modules/summarization/fold_content_spec.rb

Lines changed: 40 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,15 @@
33
RSpec.describe DiscourseAi::Summarization::FoldContent do
44
subject(:summarizer) { DiscourseAi::Summarization.topic_summary(topic) }
55

6-
describe "#summarize" do
7-
let!(:llm_model) { assign_fake_provider_to(:ai_summarization_model) }
6+
let!(:llm_model) { assign_fake_provider_to(:ai_summarization_model) }
87

9-
fab!(:topic) { Fabricate(:topic, highest_post_number: 2) }
10-
fab!(:post_1) { Fabricate(:post, topic: topic, post_number: 1, raw: "This is a text") }
8+
fab!(:topic) { Fabricate(:topic, highest_post_number: 2) }
9+
fab!(:post_1) { Fabricate(:post, topic: topic, post_number: 1, raw: "This is a text") }
1110

12-
before do
13-
SiteSetting.ai_summarization_enabled = true
11+
before { SiteSetting.ai_summarization_enabled = true }
1412

13+
describe "#summarize" do
14+
before do
1515
# Make sure each content fits in a single chunk.
1616
# 700 is the number of tokens reserved for the prompt.
1717
model_tokens =
@@ -52,4 +52,38 @@
5252
end
5353
end
5454
end
55+
56+
describe "#existing_summary" do
57+
context "when a summary already exists" do
58+
fab!(:ai_summary) do
59+
Fabricate(
60+
:ai_summary,
61+
target: topic,
62+
highest_target_number: topic.highest_post_number,
63+
original_content_sha: AiSummary.build_sha("1"),
64+
)
65+
end
66+
67+
it "doesn't mark it as outdated" do
68+
expect(summarizer.existing_summary.outdated).to eq(false)
69+
end
70+
71+
context "when it's outdated because there are new targets" do
72+
before { Fabricate(:post, topic: topic, post_number: 2, raw: "This is a text") }
73+
74+
it "marks it as outdated" do
75+
expect(summarizer.existing_summary.outdated).to eq(true)
76+
end
77+
end
78+
79+
context "when it's outdated because existing content changes" do
80+
it "marks it as outdated" do
81+
ai_summary.update!(updated_at: 20.minutes.ago)
82+
post_1.update!(last_version_at: 5.minutes.ago)
83+
84+
expect(summarizer.existing_summary.outdated).to eq(true)
85+
end
86+
end
87+
end
88+
end
5589
end

0 commit comments

Comments
 (0)