Skip to content
This repository was archived by the owner on Jul 22, 2025. It is now read-only.

Commit fbc74c7

Browse files
authored
FEATURE: Extend summary backfill to also generate gists (#896)
Updates default batch size to 0 and max to 10000
1 parent c421f71 commit fbc74c7

File tree

3 files changed

+52
-50
lines changed

3 files changed

+52
-50
lines changed

app/jobs/scheduled/summaries_backfill.rb

Lines changed: 27 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -10,34 +10,48 @@ def execute(_args)
1010
return if !SiteSetting.ai_summarization_enabled
1111
return if SiteSetting.ai_summary_backfill_maximum_topics_per_hour.zero?
1212

13-
# Split budget in 12 intervals, but make sure is at least one.
14-
limit_per_job = [SiteSetting.ai_summary_backfill_maximum_topics_per_hour, 12].max / 12
15-
budget = [current_budget, limit_per_job].min
13+
system_user = Discourse.system_user
1614

17-
backfill_candidates
18-
.limit(budget)
15+
complete_t = AiSummary.summary_types[:complete]
16+
backfill_candidates(complete_t)
17+
.limit(current_budget(complete_t))
1918
.each do |topic|
20-
DiscourseAi::Summarization.topic_summary(topic).force_summarize(Discourse.system_user)
19+
DiscourseAi::Summarization.topic_summary(topic).force_summarize(system_user)
2120
end
21+
22+
gist_t = AiSummary.summary_types[:gist]
23+
backfill_candidates(gist_t)
24+
.limit(current_budget(gist_t))
25+
.each { |topic| DiscourseAi::Summarization.topic_gist(topic).force_summarize(system_user) }
2226
end
2327

24-
def backfill_candidates
28+
def backfill_candidates(summary_type)
2529
Topic
2630
.where("topics.word_count >= ?", SiteSetting.ai_summary_backfill_minimum_word_count)
27-
.joins(
28-
"LEFT OUTER JOIN ai_summaries ais ON topics.id = ais.target_id AND ais.target_type = 'Topic'",
29-
)
31+
.joins(<<~SQL)
32+
LEFT OUTER JOIN ai_summaries ais ON
33+
topics.id = ais.target_id AND
34+
ais.target_type = 'Topic' AND
35+
ais.summary_type = '#{summary_type}'
36+
SQL
3037
.where(
3138
"ais.id IS NULL OR UPPER(ais.content_range) < topics.highest_post_number + 1",
3239
) # (1..1) gets stored ad (1..2).
3340
.order("ais.created_at DESC NULLS FIRST, topics.last_posted_at DESC")
3441
end
3542

36-
def current_budget
43+
def current_budget(type)
44+
# Split budget in 12 intervals, but make sure is at least one.
3745
base_budget = SiteSetting.ai_summary_backfill_maximum_topics_per_hour
38-
used_budget = AiSummary.complete.system.where("created_at > ?", 1.hour.ago).count
46+
limit_per_job = [base_budget, 12].max / 12
47+
48+
used_budget =
49+
AiSummary.system.where("created_at > ?", 1.hour.ago).where(summary_type: type).count
50+
51+
current_budget = [(base_budget - used_budget), limit_per_job].min
52+
return 0 if current_budget < 0
3953

40-
base_budget - used_budget
54+
current_budget
4155
end
4256
end
4357
end

config/settings.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -376,9 +376,9 @@ discourse_ai:
376376
type: list
377377
list_type: compact
378378
ai_summary_backfill_maximum_topics_per_hour:
379-
default: 10
379+
default: 0
380380
min: 0
381-
max: 1000
381+
max: 10000
382382
ai_summary_backfill_minimum_word_count:
383383
default: 200
384384
hidden: true

spec/jobs/scheduled/summaries_backfill_spec.rb

Lines changed: 23 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
RSpec.describe Jobs::SummariesBackfill do
44
fab!(:topic) { Fabricate(:topic, word_count: 200, highest_post_number: 2) }
55
let(:limit) { 24 } # guarantee two summaries per batch
6+
let(:intervals) { 12 } # budget is split into intervals. Job runs every five minutes.
67

78
before do
89
assign_fake_provider_to(:ai_summarization_model)
@@ -11,65 +12,47 @@
1112
end
1213

1314
describe "#current_budget" do
15+
let(:type) { AiSummary.summary_types[:complete] }
16+
1417
context "when no summary has been backfilled yet" do
1518
it "returns the full budget" do
16-
expect(subject.current_budget).to eq(limit)
19+
expect(subject.current_budget(type)).to eq(limit / intervals)
1720
end
1821

1922
it "ignores summaries generated by users" do
2023
Fabricate(:ai_summary, target: topic, origin: AiSummary.origins[:human])
2124

22-
expect(subject.current_budget).to eq(limit)
25+
expect(subject.current_budget(type)).to eq(limit / intervals)
2326
end
2427

25-
it "only accounts for complete type summaries" do
28+
it "only accounts for summaries of the given type" do
2629
Fabricate(:topic_ai_gist, target: topic, origin: AiSummary.origins[:human])
2730

28-
expect(subject.current_budget).to eq(limit)
29-
end
30-
end
31-
32-
context "when we already backfilled stuff" do
33-
fab!(:backfilled_summary) do
34-
Fabricate(:ai_summary, target: topic, origin: AiSummary.origins[:system])
35-
end
36-
37-
context "if it was within the budget window" do
38-
it "reduces our budget" do
39-
expect(subject.current_budget).to eq(limit - 1)
40-
end
41-
end
42-
43-
context "if it wasn't within the budget window" do
44-
before { freeze_time(2.hours.from_now) }
45-
46-
it "returns the full budget" do
47-
freeze_time(2.hours.from_now)
48-
49-
expect(subject.current_budget).to eq(limit)
50-
end
31+
expect(subject.current_budget(type)).to eq(limit / intervals)
5132
end
5233
end
5334
end
5435

5536
describe "#backfill_candidates" do
37+
let(:type) { AiSummary.summary_types[:complete] }
38+
5639
it "only selects posts with enough words" do
5740
topic.update!(word_count: 100)
5841

59-
expect(subject.backfill_candidates).to be_empty
42+
expect(subject.backfill_candidates(type)).to be_empty
6043
end
6144

6245
it "ignores up to date summaries" do
6346
Fabricate(:ai_summary, target: topic, content_range: (1..2))
6447

65-
expect(subject.backfill_candidates).to be_empty
48+
expect(subject.backfill_candidates(type)).to be_empty
6649
end
6750

6851
it "orders candidates by topic#last_posted_at" do
6952
topic.update!(last_posted_at: 1.minute.ago)
7053
topic_2 = Fabricate(:topic, word_count: 200, last_posted_at: 2.minutes.ago)
7154

72-
expect(subject.backfill_candidates.map(&:id)).to contain_exactly(topic.id, topic_2.id)
55+
expect(subject.backfill_candidates(type).map(&:id)).to contain_exactly(topic.id, topic_2.id)
7356
end
7457

7558
it "prioritizes topics without summaries" do
@@ -78,7 +61,7 @@
7861
topic.update!(last_posted_at: 1.minute.ago)
7962
Fabricate(:ai_summary, target: topic, content_range: (1..1))
8063

81-
expect(subject.backfill_candidates.map(&:id)).to contain_exactly(topic_2.id, topic.id)
64+
expect(subject.backfill_candidates(type).map(&:id)).to contain_exactly(topic_2.id, topic.id)
8265
end
8366
end
8467

@@ -88,16 +71,21 @@
8871
Fabricate(:topic, word_count: 200, last_posted_at: 2.minutes.ago, highest_post_number: 1)
8972
topic.update!(last_posted_at: 1.minute.ago)
9073
Fabricate(:ai_summary, target: topic, created_at: 3.hours.ago, content_range: (1..1))
74+
Fabricate(:topic_ai_gist, target: topic, created_at: 3.hours.ago, content_range: (1..1))
9175

9276
summary_1 = "Summary of topic_2"
77+
gist_1 = "Gist of topic_2"
9378
summary_2 = "Summary of topic"
79+
gist_2 = "Gist of topic"
9480

95-
DiscourseAi::Completions::Llm.with_prepared_responses([summary_1, summary_2]) do
96-
subject.execute({})
97-
end
81+
DiscourseAi::Completions::Llm.with_prepared_responses(
82+
[summary_1, summary_2, gist_1, gist_2],
83+
) { subject.execute({}) }
9884

99-
expect(AiSummary.find_by(target: topic_2).summarized_text).to eq(summary_1)
100-
expect(AiSummary.find_by(target: topic).summarized_text).to eq(summary_2)
85+
expect(AiSummary.complete.find_by(target: topic_2).summarized_text).to eq(summary_1)
86+
expect(AiSummary.gist.find_by(target: topic_2).summarized_text).to eq(gist_1)
87+
expect(AiSummary.complete.find_by(target: topic).summarized_text).to eq(summary_2)
88+
expect(AiSummary.gist.find_by(target: topic).summarized_text).to eq(gist_2)
10189
end
10290
end
10391
end

0 commit comments

Comments
 (0)