From 3f6cbd725ccdde0871096f244c55c003435172ed Mon Sep 17 00:00:00 2001 From: Roman Rizzi Date: Tue, 15 Oct 2024 16:23:31 -0300 Subject: [PATCH 1/7] Display gists in the hot topics list --- .../ai-topic-gist.gjs | 16 +++++ .../summarization/common/ai-summary.scss | 8 +++ config/locales/server.en.yml | 1 + config/settings.yml | 3 + lib/summarization/entry_point.rb | 20 ++++++ lib/summarization/strategies/topic_gist.rb | 11 +++- lib/topic_extensions.rb | 9 +++ plugin.rb | 5 +- spec/fabricators/ai_summary_fabricator.rb | 14 ++++ .../modules/summarization/entry_point_spec.rb | 66 +++++++++++++++++++ 10 files changed, 151 insertions(+), 2 deletions(-) create mode 100644 assets/javascripts/discourse/connectors/topic-list-main-link-bottom/ai-topic-gist.gjs create mode 100644 lib/topic_extensions.rb create mode 100644 spec/fabricators/ai_summary_fabricator.rb create mode 100644 spec/lib/modules/summarization/entry_point_spec.rb diff --git a/assets/javascripts/discourse/connectors/topic-list-main-link-bottom/ai-topic-gist.gjs b/assets/javascripts/discourse/connectors/topic-list-main-link-bottom/ai-topic-gist.gjs new file mode 100644 index 000000000..75ec73baa --- /dev/null +++ b/assets/javascripts/discourse/connectors/topic-list-main-link-bottom/ai-topic-gist.gjs @@ -0,0 +1,16 @@ +import Component from "@glimmer/component"; +import icon from "discourse-common/helpers/d-icon"; + +export default class AiTopicGist extends Component { + static shouldRender(outletArgs, helper) { + return outletArgs?.topic?.ai_topic_gist && !outletArgs.topic.excerpt; + } + + +} diff --git a/assets/stylesheets/modules/summarization/common/ai-summary.scss b/assets/stylesheets/modules/summarization/common/ai-summary.scss index 3bd138037..ddb20faef 100644 --- a/assets/stylesheets/modules/summarization/common/ai-summary.scss +++ b/assets/stylesheets/modules/summarization/common/ai-summary.scss @@ -215,3 +215,11 @@ opacity: 1; } } + +.ai-topic-gist { + margin-top: .5em; + + &__text { + font-size: var(--font-down-2); + } +} \ No newline at end of file diff --git a/config/locales/server.en.yml b/config/locales/server.en.yml index 3f564bcb4..9ae9e97fd 100644 --- a/config/locales/server.en.yml +++ b/config/locales/server.en.yml @@ -84,6 +84,7 @@ en: ai_summarization_model: "Model to use for summarization." ai_custom_summarization_allowed_groups: "Groups allowed to use create new summaries." ai_pm_summarization_allowed_groups: "Groups allowed to create and view summaries in PMs." + ai_summarize_hot_topics_list: "Display a brief summary for each topic in the hot topics list (if available)." ai_bot_enabled: "Enable the AI Bot module." ai_bot_enable_chat_warning: "Display a warning when PM chat is initiated. Can be overriden by editing the translation string: discourse_ai.ai_bot.pm_warning" diff --git a/config/settings.yml b/config/settings.yml index 2a3f5ad07..968a9da4f 100644 --- a/config/settings.yml +++ b/config/settings.yml @@ -376,6 +376,9 @@ discourse_ai: type: group_list list_type: compact default: "3|13" # 3: @staff, 13: @trust_level_3 + ai_summarize_hot_topics_list: + client: true + default: false ai_summarization_strategy: # TODO(roman): Deprecated. Remove by Sept 2024 type: enum default: "" diff --git a/lib/summarization/entry_point.rb b/lib/summarization/entry_point.rb index 052926dfb..3d70a2f45 100644 --- a/lib/summarization/entry_point.rb +++ b/lib/summarization/entry_point.rb @@ -16,6 +16,26 @@ def inject_into(plugin) plugin.add_to_serializer(:web_hook_topic_view, :summarizable) do scope.can_see_summary?(object.topic, AiSummary.summary_types[:complete]) end + + plugin.register_modifier(:topic_query_create_list_topics) do |topics, options| + if options[:filter] == :hot && SiteSetting.ai_summarization_enabled && + SiteSetting.ai_summarize_hot_topics_list + topics.includes(:ai_summaries).where( + "ai_summaries.id IS NULL OR ai_summaries.summary_type = ?", + AiSummary.summary_types[:gist], + ) + else + topics + end + end + + plugin.add_to_serializer( + :topic_list_item, + :ai_topic_gist, + include_condition: -> do + SiteSetting.ai_summarization_enabled && SiteSetting.ai_summarize_hot_topics_list + end, + ) { object.ai_summaries.to_a.first&.summarized_text } end end end diff --git a/lib/summarization/strategies/topic_gist.rb b/lib/summarization/strategies/topic_gist.rb index f52fbc399..214322593 100644 --- a/lib/summarization/strategies/topic_gist.rb +++ b/lib/summarization/strategies/topic_gist.rb @@ -47,6 +47,12 @@ def concatenation_prompt(texts_to_summarize) prompt = DiscourseAi::Completions::Prompt.new(<<~TEXT.strip) You are a summarization bot tasked with creating a single, concise sentence by merging disjointed summaries into a cohesive statement. Your response should strictly be this single, comprehensive sentence, without any additional text or comments. + + - Focus on the central theme or issue being addressed, while maintaining an objective and neutral tone. + - Avoid including extraneous details or subjective opinions. + - Maintain the original language of the text being summarized. + - Try to use no more than 20 words. + - Begin the summary directly with the main topic or issue, using clear and direct language without introductory phrases like "The discussion is about..." TEXT prompt.push(type: :user, content: <<~TEXT.strip) @@ -63,11 +69,14 @@ def concatenation_prompt(texts_to_summarize) def summarize_single_prompt(input, opts) prompt = DiscourseAi::Completions::Prompt.new(<<~TEXT.strip) You are an advanced summarization bot. Your task is to analyze a given conversation and generate a single, - concise sentence that clearly conveys the main topic and purpose of the discussion to someone with no prior context. + concise sentence that clearly conveys the main topic and purpose of the discussion to someone with no prior context. - Focus on the central theme or issue being addressed, while maintaining an objective and neutral tone. - Avoid including extraneous details or subjective opinions. - Maintain the original language of the text being summarized. + - Begin the summary directly with the main topic or issue, using clear and direct language without introductory phrases like "The discussion is about...". + - The sentence doesn't have to mention the discussion title. + - Aim to use no more than 20 words. TEXT prompt.push(type: :user, content: <<~TEXT.strip) diff --git a/lib/topic_extensions.rb b/lib/topic_extensions.rb new file mode 100644 index 000000000..49539ec2f --- /dev/null +++ b/lib/topic_extensions.rb @@ -0,0 +1,9 @@ +# frozen_string_literal: true + +module DiscourseAi + module TopicExtensions + extend ActiveSupport::Concern + + prepended { has_many :ai_summaries, as: :target } + end +end diff --git a/plugin.rb b/plugin.rb index be0434bab..bb4a320a1 100644 --- a/plugin.rb +++ b/plugin.rb @@ -76,7 +76,10 @@ module ::DiscourseAi require_relative "spec/support/stable_diffusion_stubs" end - reloadable_patch { |plugin| Guardian.prepend DiscourseAi::GuardianExtensions } + reloadable_patch do |plugin| + Guardian.prepend DiscourseAi::GuardianExtensions + Topic.prepend DiscourseAi::TopicExtensions + end register_modifier(:post_should_secure_uploads?) do |_, _, topic| if topic.private_message? && SharedAiConversation.exists?(target: topic) diff --git a/spec/fabricators/ai_summary_fabricator.rb b/spec/fabricators/ai_summary_fabricator.rb new file mode 100644 index 000000000..f88521823 --- /dev/null +++ b/spec/fabricators/ai_summary_fabricator.rb @@ -0,0 +1,14 @@ +# frozen_string_literal: true + +Fabricator(:ai_summary) do + summarized_text "complete summary" + original_content_sha "123" + algorithm "test" + target { Fabricate(:topic) } + summary_type AiSummary.summary_types[:complete] +end + +Fabricator(:topic_ai_gist, from: :ai_summary) do + summarized_text "gist" + summary_type AiSummary.summary_types[:gist] +end diff --git a/spec/lib/modules/summarization/entry_point_spec.rb b/spec/lib/modules/summarization/entry_point_spec.rb new file mode 100644 index 000000000..0ec033172 --- /dev/null +++ b/spec/lib/modules/summarization/entry_point_spec.rb @@ -0,0 +1,66 @@ +# frozen_string_literal: true + +RSpec.describe DiscourseAi::Summarization::EntryPoint do + before do + assign_fake_provider_to(:ai_summarization_model) + SiteSetting.ai_summarization_enabled = true + end + + fab!(:user) + + describe "#inject_into" do + describe "hot topics gist summarization" do + fab!(:topic_ai_gist) + fab!(:regular_summary) { Fabricate(:ai_summary, target: topic_ai_gist.target) } + + before { TopicHotScore.create!(topic_id: topic_ai_gist.target_id, score: 1.0) } + + let(:topic_query) { TopicQuery.new(user) } + + describe "topic_query_create_list_topics modifier" do + context "when hot topic summarization is enabled" do + before { SiteSetting.ai_summarize_hot_topics_list = true } + + it "preloads only gist summaries" do + gist_topic = topic_query.list_hot.topics.find { |t| t.id == topic_ai_gist.target_id } + + expect(gist_topic.ai_summaries.size).to eq(1) + expect(gist_topic.ai_summaries.first).to eq(topic_ai_gist) + end + + it "doesn't filter out hot topics without summaries" do + TopicHotScore.create!(topic_id: Fabricate(:topic).id, score: 1.0) + + expect(topic_query.list_hot.topics.size).to eq(2) + end + end + end + + describe "topic_list_item serializer's ai_summary" do + context "when hot topic summarization is disabled" do + it "doesn't include summaries" do + gist_topic = topic_query.list_hot.topics.find { |t| t.id == topic_ai_gist.target_id } + + serialized = + TopicListItemSerializer.new(gist_topic, scope: Guardian.new, root: false).as_json + + expect(serialized.has_key?(:ai_topic_gist)).to eq(false) + end + end + + context "when hot topics summarization is enabled" do + before { SiteSetting.ai_summarize_hot_topics_list = true } + + it "includes the summary" do + gist_topic = topic_query.list_hot.topics.find { |t| t.id == topic_ai_gist.target_id } + + serialized = + TopicListItemSerializer.new(gist_topic, scope: Guardian.new, root: false).as_json + + expect(serialized[:ai_topic_gist]).to be_present + end + end + end + end + end +end From f1113accda8ece02890b4e6694bb6a2e0c069acb Mon Sep 17 00:00:00 2001 From: Roman Rizzi Date: Thu, 17 Oct 2024 17:31:01 -0300 Subject: [PATCH 2/7] Adjust hot topics gist strategy and add a job to generate gists --- app/jobs/regular/hot_topics_gist_batch.rb | 24 ++++ lib/summarization.rb | 2 +- lib/summarization/entry_point.rb | 4 + .../{topic_gist.rb => hot_topic_gists.rb} | 11 +- spec/jobs/regular/hot_topics_gist_batch.rb | 121 ++++++++++++++++++ ...c_gist_spec.rb => hot_topic_gists_spec.rb} | 21 +-- 6 files changed, 162 insertions(+), 21 deletions(-) create mode 100644 app/jobs/regular/hot_topics_gist_batch.rb rename lib/summarization/strategies/{topic_gist.rb => hot_topic_gists.rb} (92%) create mode 100644 spec/jobs/regular/hot_topics_gist_batch.rb rename spec/lib/modules/summarization/strategies/{topic_gist_spec.rb => hot_topic_gists_spec.rb} (70%) diff --git a/app/jobs/regular/hot_topics_gist_batch.rb b/app/jobs/regular/hot_topics_gist_batch.rb new file mode 100644 index 000000000..021e78859 --- /dev/null +++ b/app/jobs/regular/hot_topics_gist_batch.rb @@ -0,0 +1,24 @@ +# frozen_string_literal: true + +module ::Jobs + class HotTopicsGistBatch < ::Jobs::Base + def execute(args) + return if !SiteSetting.discourse_ai_enabled + return if !SiteSetting.ai_summarization_enabled + return if !SiteSetting.ai_summarize_hot_topics_list + + Topic + .joins("JOIN topic_hot_scores on topics.id = topic_hot_scores.topic_id") + .order("topic_hot_scores.score DESC") + .limit(100) + .each do |topic| + summarizer = DiscourseAi::Summarization.topic_gist(topic) + gist = summarizer.existing_summary + + summarizer.delete_cached_summaries! if gist && gist.outdated + + summarizer.summarize(Discourse.system_user) + end + end + end +end diff --git a/lib/summarization.rb b/lib/summarization.rb index e338794f3..fe8794917 100644 --- a/lib/summarization.rb +++ b/lib/summarization.rb @@ -17,7 +17,7 @@ def self.topic_gist(topic) if SiteSetting.ai_summarization_model.present? && SiteSetting.ai_summarization_enabled DiscourseAi::Summarization::FoldContent.new( DiscourseAi::Completions::Llm.proxy(SiteSetting.ai_summarization_model), - DiscourseAi::Summarization::Strategies::TopicGist.new(topic), + DiscourseAi::Summarization::Strategies::HotTopicGists.new(topic), ) else nil diff --git a/lib/summarization/entry_point.rb b/lib/summarization/entry_point.rb index 3d70a2f45..ccdc3a783 100644 --- a/lib/summarization/entry_point.rb +++ b/lib/summarization/entry_point.rb @@ -36,6 +36,10 @@ def inject_into(plugin) SiteSetting.ai_summarization_enabled && SiteSetting.ai_summarize_hot_topics_list end, ) { object.ai_summaries.to_a.first&.summarized_text } + + # To make sure hot topic gists are inmediately up to date, we rely on this event + # instead of using a scheduled job. + plugin.on(:topic_hot_scores_updated) { Jobs.enqueue(:summarize_hot_topics_batch) } end end end diff --git a/lib/summarization/strategies/topic_gist.rb b/lib/summarization/strategies/hot_topic_gists.rb similarity index 92% rename from lib/summarization/strategies/topic_gist.rb rename to lib/summarization/strategies/hot_topic_gists.rb index 214322593..74be80b2b 100644 --- a/lib/summarization/strategies/topic_gist.rb +++ b/lib/summarization/strategies/hot_topic_gists.rb @@ -3,7 +3,7 @@ module DiscourseAi module Summarization module Strategies - class TopicGist < Base + class HotTopicGists < Base def type AiSummary.summary_types[:gist] end @@ -13,20 +13,21 @@ def targets_data op_post_number = 1 - last_twenty_posts = + hot_topics_recent_cutoff = Time.zone.now - SiteSetting.hot_topics_recent_days.days + + recent_hot_posts = Post .where(topic_id: target.id) .where("post_type = ?", Post.types[:regular]) .where("NOT hidden") - .order("post_number DESC") - .limit(20) + .where("created_at >= ?", hot_topics_recent_cutoff) .pluck(:post_number) posts_data = Post .where(topic_id: target.id) .joins(:user) - .where("post_number IN (?)", last_twenty_posts << op_post_number) + .where("post_number IN (?)", recent_hot_posts << op_post_number) .order(:post_number) .pluck(:post_number, :raw, :username) diff --git a/spec/jobs/regular/hot_topics_gist_batch.rb b/spec/jobs/regular/hot_topics_gist_batch.rb new file mode 100644 index 000000000..8eab7d8a2 --- /dev/null +++ b/spec/jobs/regular/hot_topics_gist_batch.rb @@ -0,0 +1,121 @@ +# frozen_string_literal: true + +RSpec.describe Jobs::HotTopicsGistBatch do + fab!(:topic_1) { Fabricate(:topic) } + fab!(:post_1) { Fabricate(:post, topic: topic_1, post_number: 1) } + fab!(:post_2) { Fabricate(:post, topic: topic_1, post_number: 2) } + + before do + assign_fake_provider_to(:ai_summarization_model) + SiteSetting.ai_summarization_enabled = true + SiteSetting.ai_summarize_hot_topics_list = true + end + + describe "#execute" do + context "When there is a topic with a hot score" do + before { TopicHotScore.create!(topic_id: topic_1.id, score: 0.1) } + + it "does nothing if the plugin is disabled" do + SiteSetting.discourse_ai_enabled = false + + subject.execute({}) + + gist = AiSummary.gist.find_by(target: topic_1) + expect(gist).to be_nil + end + + it "does nothing if the summarization module is disabled" do + SiteSetting.ai_summarization_enabled = false + + subject.execute({}) + + gist = AiSummary.gist.find_by(target: topic_1) + expect(gist).to be_nil + end + + it "does nothing if hot topics summarization is disabled" do + SiteSetting.ai_summarize_hot_topics_list = false + + subject.execute({}) + + gist = AiSummary.gist.find_by(target: topic_1) + expect(gist).to be_nil + end + + it "creates a gist" do + gist_result = "I'm a gist" + + DiscourseAi::Completions::Llm.with_prepared_responses([gist_result]) { subject.execute({}) } + + gist = AiSummary.gist.find_by(target: topic_1) + expect(gist.summarized_text).to eq("I'm a gist") + end + + context "and we already generated a gist of it" do + fab!(:ai_gist) do + Fabricate( + :topic_ai_gist, + target: topic_1, + original_content_sha: AiSummary.build_sha("12"), + ) + end + + it "does nothing if the gist is up to date" do + subject.execute({}) + + gist = AiSummary.gist.find_by(target: topic_1) + expect(gist.summarized_text).to eq(ai_gist.summarized_text) + expect(gist.original_content_sha).to eq(ai_gist.original_content_sha) + end + + it "regenerates it if it's outdated" do + Fabricate(:post, topic: topic_1, post_number: 3) + gist_result = "They updated me" + + DiscourseAi::Completions::Llm.with_prepared_responses([gist_result]) do + subject.execute({}) + end + + gist = AiSummary.gist.find_by(target: topic_1) + expect(gist.summarized_text).to eq(gist_result) + expect(gist.original_content_sha).to eq(AiSummary.build_sha("123")) + end + end + end + + context "when there is a topic but it doesn't have a hot score" do + it "does nothing" do + subject.execute({}) + + gist = AiSummary.gist.find_by(target: topic_1) + expect(gist).to be_nil + end + end + + context "when there are multiple hot topics" do + fab!(:topic_2) { Fabricate(:topic) } + fab!(:post_2_1) { Fabricate(:post, topic: topic_2, post_number: 1) } + fab!(:post_2_2) { Fabricate(:post, topic: topic_2, post_number: 2) } + + before do + TopicHotScore.create!(topic_id: topic_1.id, score: 0.2) + TopicHotScore.create!(topic_id: topic_2.id, score: 0.4) + end + + it "processes them by score order" do + topic_1_gist = "I'm gist of topic 1" + topic_2_gist = "I'm gist of topic 2" + + DiscourseAi::Completions::Llm.with_prepared_responses([topic_2_gist, topic_1_gist]) do + subject.execute({}) + end + + gist = AiSummary.gist.find_by(target: topic_1) + expect(gist.summarized_text).to eq(topic_1_gist) + + gist_2 = AiSummary.gist.find_by(target: topic_2) + expect(gist_2.summarized_text).to eq(topic_2_gist) + end + end + end +end diff --git a/spec/lib/modules/summarization/strategies/topic_gist_spec.rb b/spec/lib/modules/summarization/strategies/hot_topic_gists_spec.rb similarity index 70% rename from spec/lib/modules/summarization/strategies/topic_gist_spec.rb rename to spec/lib/modules/summarization/strategies/hot_topic_gists_spec.rb index ecea03ec6..5eb38e5e6 100644 --- a/spec/lib/modules/summarization/strategies/topic_gist_spec.rb +++ b/spec/lib/modules/summarization/strategies/hot_topic_gists_spec.rb @@ -1,6 +1,6 @@ # frozen_string_literal: true -RSpec.describe DiscourseAi::Summarization::Strategies::TopicGist do +RSpec.describe DiscourseAi::Summarization::Strategies::HotTopicGists do subject(:gist) { described_class.new(topic) } fab!(:topic) { Fabricate(:topic, highest_post_number: 25) } @@ -8,22 +8,13 @@ fab!(:post_2) { Fabricate(:post, topic: topic, post_number: 2) } describe "#targets_data" do - context "when the topic has more than 20 posts" do - before do - offset = 3 # Already created posts 1 and 2 - (topic.highest_post_number - 2).times do |i| - Fabricate(:post, topic: topic, post_number: i + offset) - end - end - - it "includes the OP and the last 20 posts" do - content = gist.targets_data - post_numbers = content[:contents].map { |c| c[:id] } + it "respects the `hot_topics_recent_days` setting" do + post_2.update(created_at: (SiteSetting.hot_topics_recent_days + 1).days.ago) + Fabricate(:post, topic: topic, post_number: 3) - expected = (6..25).to_a << 1 + post_numbers = gist.targets_data[:contents].map { |c| c[:id] } - expect(post_numbers).to contain_exactly(*expected) - end + expect(post_numbers).to contain_exactly(1, 3) end it "only includes visible posts" do From 650a6e881c7aac6a2c4105e5de55c87d554a17b3 Mon Sep 17 00:00:00 2001 From: Roman Rizzi Date: Fri, 18 Oct 2024 11:51:00 -0300 Subject: [PATCH 3/7] Replace setting with a configurable batch size --- app/jobs/regular/hot_topics_gist_batch.rb | 4 ++-- .../topic-list-main-link-bottom/ai-topic-gist.gjs | 3 +-- .../modules/summarization/common/ai-summary.scss | 4 ++-- config/locales/server.en.yml | 2 +- config/settings.yml | 7 ++++--- lib/summarization/entry_point.rb | 5 +++-- ...ics_gist_batch.rb => hot_topics_gist_batch_spec.rb} | 10 +++++----- spec/lib/modules/summarization/entry_point_spec.rb | 4 ++-- 8 files changed, 20 insertions(+), 19 deletions(-) rename spec/jobs/regular/{hot_topics_gist_batch.rb => hot_topics_gist_batch_spec.rb} (92%) diff --git a/app/jobs/regular/hot_topics_gist_batch.rb b/app/jobs/regular/hot_topics_gist_batch.rb index 021e78859..e65131044 100644 --- a/app/jobs/regular/hot_topics_gist_batch.rb +++ b/app/jobs/regular/hot_topics_gist_batch.rb @@ -5,12 +5,12 @@ class HotTopicsGistBatch < ::Jobs::Base def execute(args) return if !SiteSetting.discourse_ai_enabled return if !SiteSetting.ai_summarization_enabled - return if !SiteSetting.ai_summarize_hot_topics_list + return if SiteSetting.ai_summarize_max_hot_topics_gists_per_batch.zero? Topic .joins("JOIN topic_hot_scores on topics.id = topic_hot_scores.topic_id") .order("topic_hot_scores.score DESC") - .limit(100) + .limit(SiteSetting.ai_summarize_max_hot_topics_gists_per_batch) .each do |topic| summarizer = DiscourseAi::Summarization.topic_gist(topic) gist = summarizer.existing_summary diff --git a/assets/javascripts/discourse/connectors/topic-list-main-link-bottom/ai-topic-gist.gjs b/assets/javascripts/discourse/connectors/topic-list-main-link-bottom/ai-topic-gist.gjs index 75ec73baa..5548f3f1a 100644 --- a/assets/javascripts/discourse/connectors/topic-list-main-link-bottom/ai-topic-gist.gjs +++ b/assets/javascripts/discourse/connectors/topic-list-main-link-bottom/ai-topic-gist.gjs @@ -1,8 +1,7 @@ import Component from "@glimmer/component"; -import icon from "discourse-common/helpers/d-icon"; export default class AiTopicGist extends Component { - static shouldRender(outletArgs, helper) { + static shouldRender(outletArgs) { return outletArgs?.topic?.ai_topic_gist && !outletArgs.topic.excerpt; } diff --git a/assets/stylesheets/modules/summarization/common/ai-summary.scss b/assets/stylesheets/modules/summarization/common/ai-summary.scss index ddb20faef..04c5ddaa8 100644 --- a/assets/stylesheets/modules/summarization/common/ai-summary.scss +++ b/assets/stylesheets/modules/summarization/common/ai-summary.scss @@ -217,9 +217,9 @@ } .ai-topic-gist { - margin-top: .5em; + margin-top: 0.5em; &__text { font-size: var(--font-down-2); } -} \ No newline at end of file +} diff --git a/config/locales/server.en.yml b/config/locales/server.en.yml index 9ae9e97fd..ce1e16802 100644 --- a/config/locales/server.en.yml +++ b/config/locales/server.en.yml @@ -84,7 +84,7 @@ en: ai_summarization_model: "Model to use for summarization." ai_custom_summarization_allowed_groups: "Groups allowed to use create new summaries." ai_pm_summarization_allowed_groups: "Groups allowed to create and view summaries in PMs." - ai_summarize_hot_topics_list: "Display a brief summary for each topic in the hot topics list (if available)." + ai_summarize_max_hot_topics_gists_per_batch: "After updating topics in the hot list, we'll generate brief summaries of the first N ones. (Disabled when 0)" ai_bot_enabled: "Enable the AI Bot module." ai_bot_enable_chat_warning: "Display a warning when PM chat is initiated. Can be overriden by editing the translation string: discourse_ai.ai_bot.pm_warning" diff --git a/config/settings.yml b/config/settings.yml index 968a9da4f..cba62a7a8 100644 --- a/config/settings.yml +++ b/config/settings.yml @@ -376,9 +376,10 @@ discourse_ai: type: group_list list_type: compact default: "3|13" # 3: @staff, 13: @trust_level_3 - ai_summarize_hot_topics_list: - client: true - default: false + ai_summarize_max_hot_topics_gists_per_batch: + default: 0 + min: 0 + max: 1000 ai_summarization_strategy: # TODO(roman): Deprecated. Remove by Sept 2024 type: enum default: "" diff --git a/lib/summarization/entry_point.rb b/lib/summarization/entry_point.rb index ccdc3a783..34bb43bb9 100644 --- a/lib/summarization/entry_point.rb +++ b/lib/summarization/entry_point.rb @@ -19,7 +19,7 @@ def inject_into(plugin) plugin.register_modifier(:topic_query_create_list_topics) do |topics, options| if options[:filter] == :hot && SiteSetting.ai_summarization_enabled && - SiteSetting.ai_summarize_hot_topics_list + SiteSetting.ai_summarize_max_hot_topics_gists_per_batch > 0 topics.includes(:ai_summaries).where( "ai_summaries.id IS NULL OR ai_summaries.summary_type = ?", AiSummary.summary_types[:gist], @@ -33,7 +33,8 @@ def inject_into(plugin) :topic_list_item, :ai_topic_gist, include_condition: -> do - SiteSetting.ai_summarization_enabled && SiteSetting.ai_summarize_hot_topics_list + SiteSetting.ai_summarization_enabled && + SiteSetting.ai_summarize_max_hot_topics_gists_per_batch > 0 end, ) { object.ai_summaries.to_a.first&.summarized_text } diff --git a/spec/jobs/regular/hot_topics_gist_batch.rb b/spec/jobs/regular/hot_topics_gist_batch_spec.rb similarity index 92% rename from spec/jobs/regular/hot_topics_gist_batch.rb rename to spec/jobs/regular/hot_topics_gist_batch_spec.rb index 8eab7d8a2..faa137d14 100644 --- a/spec/jobs/regular/hot_topics_gist_batch.rb +++ b/spec/jobs/regular/hot_topics_gist_batch_spec.rb @@ -8,11 +8,11 @@ before do assign_fake_provider_to(:ai_summarization_model) SiteSetting.ai_summarization_enabled = true - SiteSetting.ai_summarize_hot_topics_list = true + SiteSetting.ai_summarize_max_hot_topics_gists_per_batch = 100 end describe "#execute" do - context "When there is a topic with a hot score" do + context "when there is a topic with a hot score" do before { TopicHotScore.create!(topic_id: topic_1.id, score: 0.1) } it "does nothing if the plugin is disabled" do @@ -34,7 +34,7 @@ end it "does nothing if hot topics summarization is disabled" do - SiteSetting.ai_summarize_hot_topics_list = false + SiteSetting.ai_summarize_max_hot_topics_gists_per_batch = 0 subject.execute({}) @@ -48,10 +48,10 @@ DiscourseAi::Completions::Llm.with_prepared_responses([gist_result]) { subject.execute({}) } gist = AiSummary.gist.find_by(target: topic_1) - expect(gist.summarized_text).to eq("I'm a gist") + expect(gist.summarized_text).to eq(gist_result) end - context "and we already generated a gist of it" do + context "when we already generated a gist of it" do fab!(:ai_gist) do Fabricate( :topic_ai_gist, diff --git a/spec/lib/modules/summarization/entry_point_spec.rb b/spec/lib/modules/summarization/entry_point_spec.rb index 0ec033172..c84308c83 100644 --- a/spec/lib/modules/summarization/entry_point_spec.rb +++ b/spec/lib/modules/summarization/entry_point_spec.rb @@ -19,7 +19,7 @@ describe "topic_query_create_list_topics modifier" do context "when hot topic summarization is enabled" do - before { SiteSetting.ai_summarize_hot_topics_list = true } + before { SiteSetting.ai_summarize_max_hot_topics_gists_per_batch = 100 } it "preloads only gist summaries" do gist_topic = topic_query.list_hot.topics.find { |t| t.id == topic_ai_gist.target_id } @@ -49,7 +49,7 @@ end context "when hot topics summarization is enabled" do - before { SiteSetting.ai_summarize_hot_topics_list = true } + before { SiteSetting.ai_summarize_max_hot_topics_gists_per_batch = 100 } it "includes the summary" do gist_topic = topic_query.list_hot.topics.find { |t| t.id == topic_ai_gist.target_id } From 6932262d7e1c2953756f9a9a88b3d912842749ee Mon Sep 17 00:00:00 2001 From: Roman Rizzi Date: Fri, 18 Oct 2024 15:13:20 -0300 Subject: [PATCH 4/7] Avoid loading summaries for other topic lists --- lib/summarization/entry_point.rb | 16 +++++++++++++--- .../modules/summarization/entry_point_spec.rb | 18 ++++++++++++++++++ 2 files changed, 31 insertions(+), 3 deletions(-) diff --git a/lib/summarization/entry_point.rb b/lib/summarization/entry_point.rb index 34bb43bb9..65a4386ed 100644 --- a/lib/summarization/entry_point.rb +++ b/lib/summarization/entry_point.rb @@ -33,14 +33,24 @@ def inject_into(plugin) :topic_list_item, :ai_topic_gist, include_condition: -> do + # Hack(roman): Not ideal but at this point I don't have a better way of knowing if I'm serializing items for the hot filter. + # If the association wasn't loaded, assume don't care about summaries, and including it anyway will result in multiple n+1 queries. + # In the future, the serializer could have more informacion about the topic list, so we don't depend on this. SiteSetting.ai_summarization_enabled && - SiteSetting.ai_summarize_max_hot_topics_gists_per_batch > 0 + SiteSetting.ai_summarize_max_hot_topics_gists_per_batch > 0 && + object.ai_summaries.loaded? end, - ) { object.ai_summaries.to_a.first&.summarized_text } + ) do + summaries = object.ai_summaries.to_a + + # Summaries should always have one element here. + # This is an extra safeguard to avoid including regular summaries. + summaries.find { |s| s.summary_type == "gist" }&.summarized_text + end # To make sure hot topic gists are inmediately up to date, we rely on this event # instead of using a scheduled job. - plugin.on(:topic_hot_scores_updated) { Jobs.enqueue(:summarize_hot_topics_batch) } + plugin.on(:topic_hot_scores_updated) { Jobs.enqueue(:hot_topics_gist_batch) } end end end diff --git a/spec/lib/modules/summarization/entry_point_spec.rb b/spec/lib/modules/summarization/entry_point_spec.rb index c84308c83..5ab0c2cdc 100644 --- a/spec/lib/modules/summarization/entry_point_spec.rb +++ b/spec/lib/modules/summarization/entry_point_spec.rb @@ -59,8 +59,26 @@ expect(serialized[:ai_topic_gist]).to be_present end + + it "doesn't include the summary when looking at other topic lists" do + gist_topic = topic_query.list_latest.topics.find { |t| t.id == topic_ai_gist.target_id } + + serialized = + TopicListItemSerializer.new(gist_topic, scope: Guardian.new, root: false).as_json + + expect(serialized[:ai_topic_gist]).to be_nil + end end end end end + + describe "#on topic_hot_scores_updated" do + it "queues a job to generate gists" do + expect { DiscourseEvent.trigger(:topic_hot_scores_updated) }.to change( + Jobs::HotTopicsGistBatch.jobs, + :size, + ).by(1) + end + end end From 94b417c80e2b9673707d9d55a13624386bb0ff0d Mon Sep 17 00:00:00 2001 From: Roman Rizzi Date: Fri, 18 Oct 2024 16:01:39 -0300 Subject: [PATCH 5/7] Tweak gist prompt to focus on latest posts in the context of the OP --- lib/summarization/entry_point.rb | 4 +- .../strategies/hot_topic_gists.rb | 52 +++++++++++-------- 2 files changed, 32 insertions(+), 24 deletions(-) diff --git a/lib/summarization/entry_point.rb b/lib/summarization/entry_point.rb index 65a4386ed..5ba6aae06 100644 --- a/lib/summarization/entry_point.rb +++ b/lib/summarization/entry_point.rb @@ -34,7 +34,7 @@ def inject_into(plugin) :ai_topic_gist, include_condition: -> do # Hack(roman): Not ideal but at this point I don't have a better way of knowing if I'm serializing items for the hot filter. - # If the association wasn't loaded, assume don't care about summaries, and including it anyway will result in multiple n+1 queries. + # If the association wasn't loaded, assume we don't care about summaries. Including it anyways will result in multiple n+1 queries. # In the future, the serializer could have more informacion about the topic list, so we don't depend on this. SiteSetting.ai_summarization_enabled && SiteSetting.ai_summarize_max_hot_topics_gists_per_batch > 0 && @@ -43,7 +43,7 @@ def inject_into(plugin) ) do summaries = object.ai_summaries.to_a - # Summaries should always have one element here. + # Summaries should always have one or zero elements here. # This is an extra safeguard to avoid including regular summaries. summaries.find { |s| s.summary_type == "gist" }&.summarized_text end diff --git a/lib/summarization/strategies/hot_topic_gists.rb b/lib/summarization/strategies/hot_topic_gists.rb index 74be80b2b..792624836 100644 --- a/lib/summarization/strategies/hot_topic_gists.rb +++ b/lib/summarization/strategies/hot_topic_gists.rb @@ -49,11 +49,11 @@ def concatenation_prompt(texts_to_summarize) You are a summarization bot tasked with creating a single, concise sentence by merging disjointed summaries into a cohesive statement. Your response should strictly be this single, comprehensive sentence, without any additional text or comments. - - Focus on the central theme or issue being addressed, while maintaining an objective and neutral tone. - - Avoid including extraneous details or subjective opinions. - - Maintain the original language of the text being summarized. - - Try to use no more than 20 words. - - Begin the summary directly with the main topic or issue, using clear and direct language without introductory phrases like "The discussion is about..." + - Focus on the central theme or issue being addressed, maintaining an objective and neutral tone. + - Exclude extraneous details or subjective opinions. + - Use the original language of the text. + - Begin directly with the main topic or issue, avoiding introductory phrases. + - Limit the summary to a maximum of 20 words. TEXT prompt.push(type: :user, content: <<~TEXT.strip) @@ -68,28 +68,36 @@ def concatenation_prompt(texts_to_summarize) end def summarize_single_prompt(input, opts) + statements = input.split(/(?=\d+\) \w+ said:)/) + prompt = DiscourseAi::Completions::Prompt.new(<<~TEXT.strip) - You are an advanced summarization bot. Your task is to analyze a given conversation and generate a single, - concise sentence that clearly conveys the main topic and purpose of the discussion to someone with no prior context. - - - Focus on the central theme or issue being addressed, while maintaining an objective and neutral tone. - - Avoid including extraneous details or subjective opinions. - - Maintain the original language of the text being summarized. - - Begin the summary directly with the main topic or issue, using clear and direct language without introductory phrases like "The discussion is about...". - - The sentence doesn't have to mention the discussion title. - - Aim to use no more than 20 words. + You are an advanced summarization bot. Analyze a given conversation and produce a concise, + single-sentence summary that conveys the main topic and current developments to someone with no prior context. + + ### Guidelines: + + - Emphasize the most recent updates while considering their significance within the original post. + - Focus on the central theme or issue being addressed, maintaining an objective and neutral tone. + - Exclude extraneous details or subjective opinions. + - Use the original language of the text. + - Begin directly with the main topic or issue, avoiding introductory phrases. + - Limit the summary to a maximum of 20 words. TEXT prompt.push(type: :user, content: <<~TEXT.strip) - #{opts[:content_title].present? ? "The discussion title is: " + opts[:content_title] + ".\n" : ""} - - Here are the posts, inside XML tags: - - - #{input} - + ### Context: + + The conversation began with the following statement: - Generate a single sentence of the text above maintaining the original language. + #{opts[:content_title].present? ? "The discussion title is: " + opts[:content_title] + ".\n" : ""} + + #{statements&.pop} + + Subsequent discussion includes the following: + + #{statements&.join} + + Your task is to focus on these latest messages, capturing their meaning in the context of the initial post. TEXT prompt From ecefc93465af90c25ae624149afd80409f90933f Mon Sep 17 00:00:00 2001 From: Roman Rizzi Date: Fri, 18 Oct 2024 17:05:21 -0300 Subject: [PATCH 6/7] Remove serializer hack and rely on core change from discourse/discourse#29291 --- .discourse-compatibility | 1 + lib/summarization/entry_point.rb | 5 +---- .../modules/summarization/entry_point_spec.rb | 16 +++++++++++++--- 3 files changed, 15 insertions(+), 7 deletions(-) diff --git a/.discourse-compatibility b/.discourse-compatibility index 3bb8c8512..153628c3f 100644 --- a/.discourse-compatibility +++ b/.discourse-compatibility @@ -1,3 +1,4 @@ +< 3.4.0.beta3-dev: ecf1bb49d737ea15308400f22f89d1d1e71d13d < 3.4.0.beta1-dev: 9d887ad4ace8e33c3fe7dbb39237e882c08b4f0b < 3.3.0.beta5-dev: 4d8090002f6dcd8e34d41033606bf131fa221475 < 3.3.0.beta2-dev: 61890b667c06299841ae88946f84a112f00060e1 diff --git a/lib/summarization/entry_point.rb b/lib/summarization/entry_point.rb index 5ba6aae06..769b2df44 100644 --- a/lib/summarization/entry_point.rb +++ b/lib/summarization/entry_point.rb @@ -33,12 +33,9 @@ def inject_into(plugin) :topic_list_item, :ai_topic_gist, include_condition: -> do - # Hack(roman): Not ideal but at this point I don't have a better way of knowing if I'm serializing items for the hot filter. - # If the association wasn't loaded, assume we don't care about summaries. Including it anyways will result in multiple n+1 queries. - # In the future, the serializer could have more informacion about the topic list, so we don't depend on this. SiteSetting.ai_summarization_enabled && SiteSetting.ai_summarize_max_hot_topics_gists_per_batch > 0 && - object.ai_summaries.loaded? + options[:filter] == :hot end, ) do summaries = object.ai_summaries.to_a diff --git a/spec/lib/modules/summarization/entry_point_spec.rb b/spec/lib/modules/summarization/entry_point_spec.rb index 5ab0c2cdc..b467cb047 100644 --- a/spec/lib/modules/summarization/entry_point_spec.rb +++ b/spec/lib/modules/summarization/entry_point_spec.rb @@ -55,16 +55,26 @@ gist_topic = topic_query.list_hot.topics.find { |t| t.id == topic_ai_gist.target_id } serialized = - TopicListItemSerializer.new(gist_topic, scope: Guardian.new, root: false).as_json + TopicListItemSerializer.new( + gist_topic, + scope: Guardian.new, + root: false, + filter: :hot, + ).as_json expect(serialized[:ai_topic_gist]).to be_present end it "doesn't include the summary when looking at other topic lists" do - gist_topic = topic_query.list_latest.topics.find { |t| t.id == topic_ai_gist.target_id } + gist_topic = topic_query.list_hot.topics.find { |t| t.id == topic_ai_gist.target_id } serialized = - TopicListItemSerializer.new(gist_topic, scope: Guardian.new, root: false).as_json + TopicListItemSerializer.new( + gist_topic, + scope: Guardian.new, + root: false, + filter: :latest, + ).as_json expect(serialized[:ai_topic_gist]).to be_nil end From 8d8c7cd1dfc18d9e490285da0ada3806808a991a Mon Sep 17 00:00:00 2001 From: Roman Rizzi Date: Fri, 18 Oct 2024 17:07:13 -0300 Subject: [PATCH 7/7] Update lib/summarization/strategies/hot_topic_gists.rb Co-authored-by: Rafael dos Santos Silva --- lib/summarization/strategies/hot_topic_gists.rb | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/lib/summarization/strategies/hot_topic_gists.rb b/lib/summarization/strategies/hot_topic_gists.rb index 792624836..5d4f802b5 100644 --- a/lib/summarization/strategies/hot_topic_gists.rb +++ b/lib/summarization/strategies/hot_topic_gists.rb @@ -23,6 +23,19 @@ def targets_data .where("created_at >= ?", hot_topics_recent_cutoff) .pluck(:post_number) + # It may happen that a topic is hot without any recent posts + # In that case, we'll just grab the last 20 posts + # for an useful summary of the current state of the topic + if recent_hot_posts.empty? + recent_hot_posts = + Post + .where(topic_id: target.id) + .where("post_type = ?", Post.types[:regular]) + .where("NOT hidden") + .order("post_number DESC") + .limit(20) + .pluck(:post_number) + end posts_data = Post .where(topic_id: target.id)