Skip to content
This repository was archived by the owner on Jul 22, 2025. It is now read-only.

Commit 9505a89

Browse files
authored
FEATURE: Automatically backfill regular summaries. (#892)
This change introduces a job to summarize topics and cache the results automatically. We provide a setting to control how many topics we'll backfill per hour and what the topic's minimum word count is to qualify. We'll prioritize topics without summary over outdated ones.
1 parent 98022d7 commit 9505a89

File tree

10 files changed

+191
-9
lines changed

10 files changed

+191
-9
lines changed
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
# frozen_string_literal: true
2+
3+
module ::Jobs
4+
class SummariesBackfill < ::Jobs::Scheduled
5+
every 5.minutes
6+
cluster_concurrency 1
7+
8+
def execute(_args)
9+
return if !SiteSetting.discourse_ai_enabled
10+
return if !SiteSetting.ai_summarization_enabled
11+
return if SiteSetting.ai_summary_backfill_maximum_topics_per_hour.zero?
12+
13+
# Split budget in 12 intervals, but make sure is at least one.
14+
limit_per_job = [SiteSetting.ai_summary_backfill_maximum_topics_per_hour, 12].max / 12
15+
budget = [current_budget, limit_per_job].min
16+
17+
backfill_candidates
18+
.limit(budget)
19+
.each do |topic|
20+
DiscourseAi::Summarization.topic_summary(topic).force_summarize(Discourse.system_user)
21+
end
22+
end
23+
24+
def backfill_candidates
25+
Topic
26+
.where("topics.word_count >= ?", SiteSetting.ai_summary_backfill_minimum_word_count)
27+
.joins(
28+
"LEFT OUTER JOIN ai_summaries ais ON topics.id = ais.target_id AND ais.target_type = 'Topic'",
29+
)
30+
.where(
31+
"ais.id IS NULL OR UPPER(ais.content_range) < topics.highest_post_number + 1",
32+
) # (1..1) gets stored ad (1..2).
33+
.order("ais.created_at DESC NULLS FIRST, topics.last_posted_at DESC")
34+
end
35+
36+
def current_budget
37+
base_budget = SiteSetting.ai_summary_backfill_maximum_topics_per_hour
38+
used_budget = AiSummary.complete.system.where("created_at > ?", 1.hour.ago).count
39+
40+
base_budget - used_budget
41+
end
42+
end
43+
end

app/models/ai_summary.rb

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,19 @@ class AiSummary < ActiveRecord::Base
44
belongs_to :target, polymorphic: true
55

66
enum :summary_type, { complete: 0, gist: 1 }
7+
enum :origin, { human: 0, system: 1 }
8+
9+
def self.store!(strategy, llm_model, summary, og_content, human:)
10+
content_ids = og_content.map { |c| c[:id] }
711

8-
def self.store!(target, summary_type, model, summary, content_ids)
912
AiSummary.create!(
10-
target: target,
11-
algorithm: model,
13+
target: strategy.target,
14+
algorithm: llm_model.name,
1215
content_range: (content_ids.first..content_ids.last),
1316
summarized_text: summary,
1417
original_content_sha: build_sha(content_ids.join),
15-
summary_type: summary_type,
18+
summary_type: strategy.type,
19+
origin: !!human ? origins[:human] : origins[:system],
1620
)
1721
end
1822

@@ -43,6 +47,7 @@ def outdated
4347
# created_at :datetime not null
4448
# updated_at :datetime not null
4549
# summary_type :integer default("complete"), not null
50+
# origin :integer
4651
#
4752
# Indexes
4853
#

config/locales/server.en.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,12 +79,14 @@ en:
7979
ai_embeddings_semantic_related_include_closed_topics: "Include closed topics in semantic search results"
8080
ai_embeddings_semantic_search_hyde_model: "Model used to expand keywords to get better results during a semantic search"
8181
ai_embeddings_per_post_enabled: Generate embeddings for each post
82+
8283
ai_summarization_enabled: "Enable the topic summarization module."
8384
ai_summarization_model: "Model to use for summarization."
8485
ai_custom_summarization_allowed_groups: "Groups allowed to use create new summaries."
8586
ai_pm_summarization_allowed_groups: "Groups allowed to create and view summaries in PMs."
8687
ai_summarize_max_hot_topics_gists_per_batch: "After updating topics in the hot list, we'll generate brief summaries of the first N ones. (Disabled when 0)"
8788
ai_hot_topic_gists_allowed_groups: "Groups allowed to see gists in the hot topics list."
89+
ai_summary_backfill_maximum_topics_per_hour: "Number of topic summaries to backfill per hour."
8890

8991
ai_bot_enabled: "Enable the AI Bot module."
9092
ai_bot_enable_chat_warning: "Display a warning when PM chat is initiated. Can be overriden by editing the translation string: discourse_ai.ai_bot.pm_warning"

config/settings.yml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -375,6 +375,13 @@ discourse_ai:
375375
hidden: true
376376
type: list
377377
list_type: compact
378+
ai_summary_backfill_maximum_topics_per_hour:
379+
default: 10
380+
min: 0
381+
max: 1000
382+
ai_summary_backfill_minimum_word_count:
383+
default: 200
384+
hidden: true
378385

379386
ai_bot_enabled:
380387
default: false
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
# frozen_string_literal: true
2+
class TrackAiSummaryOrigin < ActiveRecord::Migration[7.1]
3+
def change
4+
add_column :ai_summaries, :origin, :integer
5+
end
6+
end
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
# frozen_string_literal: true
2+
class SetOriginForExistingAiSummaries < ActiveRecord::Migration[7.1]
3+
def up
4+
DB.exec <<~SQL
5+
UPDATE ai_summaries
6+
SET origin = CASE WHEN summary_type = 0 THEN 0 ELSE 1 END
7+
WHERE origin IS NULL
8+
SQL
9+
end
10+
11+
def down
12+
raise ActiveRecord::IrreversibleMigration
13+
end
14+
end

lib/summarization/fold_content.rb

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,11 +35,11 @@ def summarize(user, &on_partial_blk)
3535

3636
if persist_summaries
3737
AiSummary.store!(
38-
strategy.target,
39-
strategy.type,
40-
llm_model.name,
38+
strategy,
39+
llm_model,
4140
clean_summary,
42-
truncated_content.map { |c| c[:id] },
41+
truncated_content,
42+
human: user&.human?,
4343
)
4444
else
4545
AiSummary.new(summarized_text: clean_summary)

spec/fabricators/ai_summary_fabricator.rb

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,11 @@
66
algorithm "test"
77
target { Fabricate(:topic) }
88
summary_type AiSummary.summary_types[:complete]
9+
origin AiSummary.origins[:human]
910
end
1011

1112
Fabricator(:topic_ai_gist, from: :ai_summary) do
1213
summarized_text "gist"
1314
summary_type AiSummary.summary_types[:gist]
15+
origin AiSummary.origins[:system]
1416
end
Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
# frozen_string_literal: true
2+
3+
RSpec.describe Jobs::SummariesBackfill do
4+
fab!(:topic) { Fabricate(:topic, word_count: 200, highest_post_number: 2) }
5+
let(:limit) { 24 } # guarantee two summaries per batch
6+
7+
before do
8+
assign_fake_provider_to(:ai_summarization_model)
9+
SiteSetting.ai_summarization_enabled = true
10+
SiteSetting.ai_summary_backfill_maximum_topics_per_hour = limit
11+
end
12+
13+
describe "#current_budget" do
14+
context "when no summary has been backfilled yet" do
15+
it "returns the full budget" do
16+
expect(subject.current_budget).to eq(limit)
17+
end
18+
19+
it "ignores summaries generated by users" do
20+
Fabricate(:ai_summary, target: topic, origin: AiSummary.origins[:human])
21+
22+
expect(subject.current_budget).to eq(limit)
23+
end
24+
25+
it "only accounts for complete type summaries" do
26+
Fabricate(:topic_ai_gist, target: topic, origin: AiSummary.origins[:human])
27+
28+
expect(subject.current_budget).to eq(limit)
29+
end
30+
end
31+
32+
context "when we already backfilled stuff" do
33+
fab!(:backfilled_summary) do
34+
Fabricate(:ai_summary, target: topic, origin: AiSummary.origins[:system])
35+
end
36+
37+
context "if it was within the budget window" do
38+
it "reduces our budget" do
39+
expect(subject.current_budget).to eq(limit - 1)
40+
end
41+
end
42+
43+
context "if it wasn't within the budget window" do
44+
before { freeze_time(2.hours.from_now) }
45+
46+
it "returns the full budget" do
47+
freeze_time(2.hours.from_now)
48+
49+
expect(subject.current_budget).to eq(limit)
50+
end
51+
end
52+
end
53+
end
54+
55+
describe "#backfill_candidates" do
56+
it "only selects posts with enough words" do
57+
topic.update!(word_count: 100)
58+
59+
expect(subject.backfill_candidates).to be_empty
60+
end
61+
62+
it "ignores up to date summaries" do
63+
Fabricate(:ai_summary, target: topic, content_range: (1..2))
64+
65+
expect(subject.backfill_candidates).to be_empty
66+
end
67+
68+
it "orders candidates by topic#last_posted_at" do
69+
topic.update!(last_posted_at: 1.minute.ago)
70+
topic_2 = Fabricate(:topic, word_count: 200, last_posted_at: 2.minutes.ago)
71+
72+
expect(subject.backfill_candidates.map(&:id)).to contain_exactly(topic.id, topic_2.id)
73+
end
74+
75+
it "prioritizes topics without summaries" do
76+
topic_2 =
77+
Fabricate(:topic, word_count: 200, last_posted_at: 2.minutes.ago, highest_post_number: 1)
78+
topic.update!(last_posted_at: 1.minute.ago)
79+
Fabricate(:ai_summary, target: topic, content_range: (1..1))
80+
81+
expect(subject.backfill_candidates.map(&:id)).to contain_exactly(topic_2.id, topic.id)
82+
end
83+
end
84+
85+
describe "#execute" do
86+
it "backfills a batch" do
87+
topic_2 =
88+
Fabricate(:topic, word_count: 200, last_posted_at: 2.minutes.ago, highest_post_number: 1)
89+
topic.update!(last_posted_at: 1.minute.ago)
90+
Fabricate(:ai_summary, target: topic, created_at: 3.hours.ago, content_range: (1..1))
91+
92+
summary_1 = "Summary of topic_2"
93+
summary_2 = "Summary of topic"
94+
95+
DiscourseAi::Completions::Llm.with_prepared_responses([summary_1, summary_2]) do
96+
subject.execute({})
97+
end
98+
99+
expect(AiSummary.find_by(target: topic_2).summarized_text).to eq(summary_1)
100+
expect(AiSummary.find_by(target: topic).summarized_text).to eq(summary_2)
101+
end
102+
end
103+
end

spec/lib/modules/summarization/fold_content_spec.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
let(:single_summary) { "single" }
2727
let(:concatenated_summary) { "this is a concatenated summary" }
2828

29-
let(:user) { User.new }
29+
fab!(:user)
3030

3131
context "when the content to summarize fits in a single call" do
3232
it "does one call to summarize content" do

0 commit comments

Comments
 (0)