From 184a180691171543c90735f683f26139d7b029f9 Mon Sep 17 00:00:00 2001 From: Roman Rizzi Date: Wed, 23 Oct 2024 18:45:35 -0300 Subject: [PATCH 1/3] FEATURE: Fast-track gist regeneration when a hot topic gets a new post --- app/jobs/regular/hot_topics_gist_batch.rb | 2 +- app/jobs/regular/update_hot_topic_gist.rb | 26 +++++++ lib/summarization/entry_point.rb | 4 ++ .../regular/update_hot_topic_gist_spec.rb | 70 +++++++++++++++++++ 4 files changed, 101 insertions(+), 1 deletion(-) create mode 100644 app/jobs/regular/update_hot_topic_gist.rb create mode 100644 spec/jobs/regular/update_hot_topic_gist_spec.rb diff --git a/app/jobs/regular/hot_topics_gist_batch.rb b/app/jobs/regular/hot_topics_gist_batch.rb index b5ff3a7e1..e71fc0221 100644 --- a/app/jobs/regular/hot_topics_gist_batch.rb +++ b/app/jobs/regular/hot_topics_gist_batch.rb @@ -2,7 +2,7 @@ module ::Jobs class HotTopicsGistBatch < ::Jobs::Base - def execute(args) + def execute(_args) return if !SiteSetting.discourse_ai_enabled return if !SiteSetting.ai_summarization_enabled return if SiteSetting.ai_summarize_max_hot_topics_gists_per_batch.zero? diff --git a/app/jobs/regular/update_hot_topic_gist.rb b/app/jobs/regular/update_hot_topic_gist.rb new file mode 100644 index 000000000..266068c12 --- /dev/null +++ b/app/jobs/regular/update_hot_topic_gist.rb @@ -0,0 +1,26 @@ +# frozen_string_literal: true + +module ::Jobs + class UpdateHotTopicGist < ::Jobs::Base + sidekiq_options retry: false + + def execute(args) + return if !SiteSetting.discourse_ai_enabled + return if !SiteSetting.ai_summarization_enabled + return if SiteSetting.ai_summarize_max_hot_topics_gists_per_batch.zero? + + topic = Topic.find_by(id: args[:topic_id]) + return if topic.blank? + + return if !TopicHotScore.where(topic: topic).exists? + + summarizer = DiscourseAi::Summarization.topic_gist(topic) + gist = summarizer.existing_summary + return if gist.blank? + return if !gist.outdated + + summarizer.delete_cached_summaries! + summarizer.summarize(Discourse.system_user) + end + end +end diff --git a/lib/summarization/entry_point.rb b/lib/summarization/entry_point.rb index 07fc3ef41..ebcf4adc4 100644 --- a/lib/summarization/entry_point.rb +++ b/lib/summarization/entry_point.rb @@ -47,6 +47,10 @@ def inject_into(plugin) # To make sure hot topic gists are inmediately up to date, we rely on this event # instead of using a scheduled job. plugin.on(:topic_hot_scores_updated) { Jobs.enqueue(:hot_topics_gist_batch) } + + plugin.on(:post_created) do |post| + Jobs.enqueue(:update_hot_topic_gist, topic_id: post&.topic_id) + end end end end diff --git a/spec/jobs/regular/update_hot_topic_gist_spec.rb b/spec/jobs/regular/update_hot_topic_gist_spec.rb new file mode 100644 index 000000000..b999df67d --- /dev/null +++ b/spec/jobs/regular/update_hot_topic_gist_spec.rb @@ -0,0 +1,70 @@ +# frozen_string_literal: true + +RSpec.describe Jobs::UpdateHotTopicGist do + describe "#execute" do + fab!(:topic_1) { Fabricate(:topic) } + fab!(:post_1) { Fabricate(:post, topic: topic_1, post_number: 1) } + fab!(:post_2) { Fabricate(:post, topic: topic_1, post_number: 2) } + + before do + assign_fake_provider_to(:ai_summarization_model) + SiteSetting.ai_summarization_enabled = true + SiteSetting.ai_summarize_max_hot_topics_gists_per_batch = 100 + end + + context "when the hot topic has a gist" do + before { TopicHotScore.create!(topic_id: topic_1.id, score: 0.1) } + fab!(:ai_gist) do + Fabricate(:topic_ai_gist, target: topic_1, original_content_sha: AiSummary.build_sha("12")) + end + let(:updated_gist) { "They updated me :(" } + + context "when it's up to date" do + it "does nothing" do + DiscourseAi::Completions::Llm.with_prepared_responses([updated_gist]) do + subject.execute(topic_id: topic_1.id) + end + + gist = AiSummary.gist.find_by(target: topic_1) + expect(AiSummary.gist.where(target: topic_1).count).to eq(1) + expect(gist.summarized_text).not_to eq(updated_gist) + end + end + + context "when it's outdated" do + it "regenerates the gist using the latest data" do + Fabricate(:post, topic: topic_1, post_number: 3) + + DiscourseAi::Completions::Llm.with_prepared_responses([updated_gist]) do + subject.execute(topic_id: topic_1.id) + end + + gist = AiSummary.gist.find_by(target: topic_1) + expect(AiSummary.gist.where(target: topic_1).count).to eq(1) + expect(gist.summarized_text).to eq(updated_gist) + expect(gist.original_content_sha).to eq(AiSummary.build_sha("123")) + end + end + end + + context "when the topic doesn't have a hot topic score" do + it "does nothing" do + subject.execute({}) + + gist = AiSummary.gist.find_by(target: topic_1) + expect(gist).to be_nil + end + end + + context "when the topic has a hot topic score but no gist" do + before { TopicHotScore.create!(topic_id: topic_1.id, score: 0.1) } + + it "does nothing" do + subject.execute({}) + + gist = AiSummary.gist.find_by(target: topic_1) + expect(gist).to be_nil + end + end + end +end From 1807ff5d8cf32f1e701ba252e7f983f7afae49a9 Mon Sep 17 00:00:00 2001 From: Rafael Silva Date: Fri, 25 Oct 2024 12:31:06 -0300 Subject: [PATCH 2/3] DEV: Introduce an upsert-like summarize --- app/jobs/regular/hot_topics_gist_batch.rb | 6 +----- app/jobs/regular/update_hot_topic_gist.rb | 3 +-- lib/summarization/fold_content.rb | 5 +++++ 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/app/jobs/regular/hot_topics_gist_batch.rb b/app/jobs/regular/hot_topics_gist_batch.rb index e71fc0221..aef230a75 100644 --- a/app/jobs/regular/hot_topics_gist_batch.rb +++ b/app/jobs/regular/hot_topics_gist_batch.rb @@ -15,11 +15,7 @@ def execute(_args) summarizer = DiscourseAi::Summarization.topic_gist(topic) gist = summarizer.existing_summary - if gist.blank? || gist.outdated - summarizer.delete_cached_summaries! - - summarizer.summarize(Discourse.system_user) - end + summarizer.force_summarize(Discourse.system_user) if gist.blank? || gist.outdated end end end diff --git a/app/jobs/regular/update_hot_topic_gist.rb b/app/jobs/regular/update_hot_topic_gist.rb index 266068c12..f4a2d3b3e 100644 --- a/app/jobs/regular/update_hot_topic_gist.rb +++ b/app/jobs/regular/update_hot_topic_gist.rb @@ -19,8 +19,7 @@ def execute(args) return if gist.blank? return if !gist.outdated - summarizer.delete_cached_summaries! - summarizer.summarize(Discourse.system_user) + summarizer.force_summarize(Discourse.system_user) end end end diff --git a/lib/summarization/fold_content.rb b/lib/summarization/fold_content.rb index a443e04fd..da3880d80 100644 --- a/lib/summarization/fold_content.rb +++ b/lib/summarization/fold_content.rb @@ -66,6 +66,11 @@ def delete_cached_summaries! AiSummary.where(target: strategy.target, summary_type: strategy.type).destroy_all end + def force_summarize(user, &on_partial_blk) + delete_cached_summaries! + summarize(user, &on_partial_blk) + end + private attr_reader :persist_summaries From b637cc3ec5ff96c79768b0429032897c3da65e32 Mon Sep 17 00:00:00 2001 From: Rafael Silva Date: Fri, 25 Oct 2024 12:31:39 -0300 Subject: [PATCH 3/3] FIX: Only enqueue fast-track gist for hot hot hot topics --- lib/summarization/entry_point.rb | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/lib/summarization/entry_point.rb b/lib/summarization/entry_point.rb index ebcf4adc4..f3d009398 100644 --- a/lib/summarization/entry_point.rb +++ b/lib/summarization/entry_point.rb @@ -48,8 +48,17 @@ def inject_into(plugin) # instead of using a scheduled job. plugin.on(:topic_hot_scores_updated) { Jobs.enqueue(:hot_topics_gist_batch) } + # As this event can be triggered quite often, let's be overly cautious enqueueing + # jobs if the feature is disabled. plugin.on(:post_created) do |post| - Jobs.enqueue(:update_hot_topic_gist, topic_id: post&.topic_id) + if SiteSetting.discourse_ai_enabled && SiteSetting.ai_summarization_enabled && + SiteSetting.ai_summarize_max_hot_topics_gists_per_batch > 0 && post.topic + hot_score = TopicHotScore.find_by(topic: post.topic) + + if hot_score.exists? && hot_score.updated_at > 1.day.ago + Jobs.enqueue(:update_hot_topic_gist, topic_id: post&.topic_id) + end + end end end end