Skip to content
This repository was archived by the owner on Jul 22, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion app/jobs/scheduled/summaries_backfill.rb
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def backfill_candidates(summary_type)
ais.target_type = 'Topic' AND
ais.summary_type = '#{summary_type}'
SQL
.where("topics.created_at > current_timestamp - INTERVAL '#{max_age_days.to_i} DAY'")
.where("topics.last_posted_at > current_timestamp - INTERVAL '#{max_age_days.to_i} DAY'")
.where(
<<~SQL, # (1..1) gets stored ad (1..2).
ais.id IS NULL OR (
Expand Down
5 changes: 3 additions & 2 deletions lib/summarization/fold_content.rb
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,9 @@ def existing_summary
if summary
@existing_summary = summary

if existing_summary.original_content_sha != latest_sha
@existing_summary.mark_as_outdated
if summary.original_content_sha != latest_sha ||
content_to_summarize.any? { |cts| cts[:last_version_at] > summary.updated_at }
summary.mark_as_outdated
end
end
end
Expand Down
4 changes: 2 additions & 2 deletions lib/summarization/strategies/chat_messages.rb
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ def targets_data
.where("chat_messages.created_at > ?", since.hours.ago)
.includes(:user)
.order(created_at: :asc)
.pluck(:id, :username_lower, :message)
.map { { id: _1, poster: _2, text: _3 } }
.pluck(:id, :username_lower, :message, :updated_at)
.map { { id: _1, poster: _2, text: _3, last_version_at: _4 } }
end

def summary_extension_prompt(summary, contents)
Expand Down
6 changes: 3 additions & 3 deletions lib/summarization/strategies/hot_topic_gists.rb
Original file line number Diff line number Diff line change
Expand Up @@ -49,16 +49,16 @@ def targets_data
.joins(:user)
.where("post_number IN (?)", recent_hot_posts << op_post_number)
.order(:post_number)
.pluck(:post_number, :raw, :username)
.pluck(:post_number, :raw, :username, :last_version_at)

posts_data.reduce([]) do |memo, (pn, raw, username)|
posts_data.reduce([]) do |memo, (pn, raw, username, last_version_at)|
raw_text = raw

if pn == 1 && target.topic_embed&.embed_content_cache.present?
raw_text = target.topic_embed&.embed_content_cache
end

memo << { poster: username, id: pn, text: raw_text }
memo << { poster: username, id: pn, text: raw_text, last_version_at: last_version_at }
end
end

Expand Down
5 changes: 3 additions & 2 deletions lib/summarization/strategies/topic_summary.rb
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,17 @@ def targets_data
:post_number,
:raw,
:username,
:last_version_at,
)

posts_data.reduce([]) do |memo, (pn, raw, username)|
posts_data.reduce([]) do |memo, (pn, raw, username, last_version_at)|
raw_text = raw

if pn == 1 && target.topic_embed&.embed_content_cache.present?
raw_text = target.topic_embed&.embed_content_cache
end

memo << { poster: username, id: pn, text: raw_text }
memo << { poster: username, id: pn, text: raw_text, last_version_at: last_version_at }
end
end

Expand Down
10 changes: 6 additions & 4 deletions spec/jobs/scheduled/summaries_backfill_spec.rb
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
# frozen_string_literal: true

RSpec.describe Jobs::SummariesBackfill do
fab!(:topic) { Fabricate(:topic, word_count: 200, highest_post_number: 2) }
fab!(:topic) do
Fabricate(:topic, word_count: 200, highest_post_number: 2, last_posted_at: 2.hours.ago)
end
let(:limit) { 24 } # guarantee two summaries per batch
let(:intervals) { 12 } # budget is split into intervals. Job runs every five minutes.

Expand Down Expand Up @@ -73,7 +75,7 @@

it "respects max age setting" do
SiteSetting.ai_summary_backfill_topic_max_age_days = 1
topic.update!(created_at: 2.days.ago)
topic.update!(last_posted_at: 2.days.ago)

expect(subject.backfill_candidates(type)).to be_empty
end
Expand Down Expand Up @@ -112,14 +114,14 @@
end

it "updates the highest_target_number if the summary turned to be up to date" do
og_highest_post_number = topic.highest_post_number
existing_summary =
Fabricate(
:ai_summary,
target: topic,
updated_at: 3.hours.ago,
highest_target_number: topic.highest_post_number,
highest_target_number: og_highest_post_number,
)
og_highest_post_number = topic.highest_post_number
topic.update!(highest_post_number: og_highest_post_number + 1)

# No prepared responses here. We don't perform a completion call.
Expand Down
46 changes: 40 additions & 6 deletions spec/lib/modules/summarization/fold_content_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,15 @@
RSpec.describe DiscourseAi::Summarization::FoldContent do
subject(:summarizer) { DiscourseAi::Summarization.topic_summary(topic) }

describe "#summarize" do
let!(:llm_model) { assign_fake_provider_to(:ai_summarization_model) }
let!(:llm_model) { assign_fake_provider_to(:ai_summarization_model) }

fab!(:topic) { Fabricate(:topic, highest_post_number: 2) }
fab!(:post_1) { Fabricate(:post, topic: topic, post_number: 1, raw: "This is a text") }
fab!(:topic) { Fabricate(:topic, highest_post_number: 2) }
fab!(:post_1) { Fabricate(:post, topic: topic, post_number: 1, raw: "This is a text") }

before do
SiteSetting.ai_summarization_enabled = true
before { SiteSetting.ai_summarization_enabled = true }

describe "#summarize" do
before do
# Make sure each content fits in a single chunk.
# 700 is the number of tokens reserved for the prompt.
model_tokens =
Expand Down Expand Up @@ -52,4 +52,38 @@
end
end
end

describe "#existing_summary" do
context "when a summary already exists" do
fab!(:ai_summary) do
Fabricate(
:ai_summary,
target: topic,
highest_target_number: topic.highest_post_number,
original_content_sha: AiSummary.build_sha("1"),
)
end

it "doesn't mark it as outdated" do
expect(summarizer.existing_summary.outdated).to eq(false)
end

context "when it's outdated because there are new targets" do
before { Fabricate(:post, topic: topic, post_number: 2, raw: "This is a text") }

it "marks it as outdated" do
expect(summarizer.existing_summary.outdated).to eq(true)
end
end

context "when it's outdated because existing content changes" do
it "marks it as outdated" do
ai_summary.update!(updated_at: 20.minutes.ago)
post_1.update!(last_version_at: 5.minutes.ago)

expect(summarizer.existing_summary.outdated).to eq(true)
end
end
end
end
end