discourse · romanrizzi · Oct 25, 2024 · Oct 24, 2024 · Oct 24, 2024 · Oct 25, 2024
diff --git a/app/controllers/discourse_ai/summarization/chat_summary_controller.rb b/app/controllers/discourse_ai/summarization/chat_summary_controller.rb
@@ -26,7 +26,7 @@ def show
           strategy = DiscourseAi::Summarization::Strategies::ChatMessages.new(channel, since)
 
           summarized_text =
-            if strategy.targets_data[:contents].empty?
+            if strategy.targets_data.empty?
               I18n.t("discourse_ai.summarization.chat.no_targets")
             else
               summarizer.summarize(current_user)&.summarized_text

diff --git a/lib/summarization/fold_content.rb b/lib/summarization/fold_content.rb
@@ -18,43 +18,26 @@ def initialize(llm, strategy, persist_summaries: true)
       attr_reader :llm, :strategy
 
       # @param user { User } - User object used for auditing usage.
-      #
       # @param &on_partial_blk { Block - Optional } - The passed block will get called with the LLM partial response alongside a cancel function.
       # Note: The block is only called with results of the final summary, not intermediate summaries.
       #
       # @returns { AiSummary } - Resulting summary.
       def summarize(user, &on_partial_blk)
-        opts = content_to_summarize.except(:contents)
-
-        initial_chunks =
-          rebalance_chunks(
-            content_to_summarize[:contents].map do |c|
-              { ids: [c[:id]], summary: format_content_item(c) }
-            end,
-          )
-
-        # Special case where we can do all the summarization in one pass.
-        result =
-          if initial_chunks.length == 1
-            {
-              summary:
-                summarize_single(initial_chunks.first[:summary], user, opts, &on_partial_blk),
-              chunks: [],
-            }
-          else
-            summarize_chunks(initial_chunks, user, opts, &on_partial_blk)
-          end
+        base_summary = ""
+        initial_pos = 0
+        folded_summary =
+          fold(content_to_summarize, base_summary, initial_pos, user, &on_partial_blk)
 
         clean_summary =
-          Nokogiri::HTML5.fragment(result[:summary]).css("ai")&.first&.text || result[:summary]
+          Nokogiri::HTML5.fragment(folded_summary).css("ai")&.first&.text || folded_summary
 
         if persist_summaries
           AiSummary.store!(
             strategy.target,
             strategy.type,
             llm_model.name,
             clean_summary,
-            content_to_summarize[:contents].map { |c| c[:id] },
+            content_to_summarize.map { |c| c[:id] },
           )
         else
           AiSummary.new(summarized_text: clean_summary)
@@ -96,90 +79,58 @@ def content_to_summarize
       end
 
       def latest_sha
-        @latest_sha ||= AiSummary.build_sha(content_to_summarize[:contents].map { |c| c[:id] }.join)
+        @latest_sha ||= AiSummary.build_sha(content_to_summarize.map { |c| c[:id] }.join)
       end
 
-      def summarize_chunks(chunks, user, opts, &on_partial_blk)
-        # Safely assume we always have more than one chunk.
-        summarized_chunks = summarize_in_chunks(chunks, user, opts)
-        total_summaries_size =
-          llm_model.tokenizer_class.size(summarized_chunks.map { |s| s[:summary].to_s }.join)
-
-        if total_summaries_size < available_tokens
-          # Chunks are small enough, we can concatenate them.
-          {
-            summary:
-              concatenate_summaries(
-                summarized_chunks.map { |s| s[:summary] },
-                user,
-                &on_partial_blk
-              ),
-            chunks: summarized_chunks,
-          }
-        else
-          # We have summarized chunks but we can't concatenate them yet. Split them into smaller summaries and summarize again.
-          rebalanced_chunks = rebalance_chunks(summarized_chunks)
+      # @param items { Array<Hash> } - Content to summarize. Structure will be: { poster: who wrote the content, id: a way to order content, text: content }
+      # @param summary { String } - Intermediate summaries that we'll keep extending as part of our "folding" algorithm.
+      # @param cursor { Integer } - Idx to know how much we already summarized.
+      # @param user { User } - User object used for auditing usage.
+      # @param &on_partial_blk { Block - Optional } - The passed block will get called with the LLM partial response alongside a cancel function.
+      # Note: The block is only called with results of the final summary, not intermediate summaries.
+      #
+      # The summarization algorithm.
+      # The idea is to build an initial summary packing as much content as we can. Once we have the initial summary, we'll keep extending using the leftover
+      # content until there is nothing left.
+      #
+      # @returns { String } - Resulting summary.
+      def fold(items, summary, cursor, user, &on_partial_blk)
+        tokenizer = llm_model.tokenizer_class
+        tokens_left = available_tokens - tokenizer.size(summary)
+        iteration_content = []
 
-          summarize_chunks(rebalanced_chunks, user, opts, &on_partial_blk)
-        end
-      end
+        items.each_with_index do |item, idx|
+          next if idx < cursor
 
-      def format_content_item(item)
-        "(#{item[:id]} #{item[:poster]} said: #{item[:text]} "
-      end
+          as_text = "(#{item[:id]} #{item[:poster]} said: #{item[:text]} "
 
-      def rebalance_chunks(chunks)
-        section = { ids: [], summary: "" }
-
-        chunks =
-          chunks.reduce([]) do |sections, chunk|
-            if llm_model.tokenizer_class.can_expand_tokens?(
-                 section[:summary],
-                 chunk[:summary],
-                 available_tokens,
-               )
-              section[:summary] += chunk[:summary]
-              section[:ids] = section[:ids].concat(chunk[:ids])
-            else
-              sections << section
-              section = chunk
-            end
-
-            sections
+          if tokenizer.below_limit?(as_text, tokens_left)
+            iteration_content << item
+            tokens_left -= tokenizer.size(as_text)
+            cursor += 1
+          else
+            break
           end
+        end
 
-        chunks << section if section[:summary].present?
-
-        chunks
-      end
-
-      def summarize_single(text, user, opts, &on_partial_blk)
-        prompt = strategy.summarize_single_prompt(text, opts)
-
-        llm.generate(prompt, user: user, feature_name: "summarize", &on_partial_blk)
-      end
-
-      def summarize_in_chunks(chunks, user, opts)
-        chunks.map do |chunk|
-          prompt = strategy.summarize_single_prompt(chunk[:summary], opts)
-
-          chunk[:summary] = llm.generate(
-            prompt,
-            user: user,
-            max_tokens: 300,
-            feature_name: "summarize",
+        prompt =
+          (
+            if summary.blank?
+              strategy.first_summary_prompt(iteration_content)
+            else
+              strategy.summary_extension_prompt(summary, iteration_content)
+            end
           )
 
-          chunk
+        if cursor == items.length
+          llm.generate(prompt, user: user, feature_name: "summarize", &on_partial_blk)
+        else
+          latest_summary =
+            llm.generate(prompt, user: user, max_tokens: 600, feature_name: "summarize")
+          fold(items, latest_summary, cursor, user, &on_partial_blk)
         end
       end
 
-      def concatenate_summaries(texts_to_summarize, user, &on_partial_blk)
-        prompt = strategy.concatenation_prompt(texts_to_summarize)
-
-        llm.generate(prompt, user: user, &on_partial_blk)
-      end
-
       def available_tokens
         # Reserve tokens for the response and the base prompt
         # ~500 words

diff --git a/lib/summarization/strategies/base.rb b/lib/summarization/strategies/base.rb
@@ -11,46 +11,35 @@ def initialize(target)
           @target = target
         end
 
-        attr_reader :target
+        attr_reader :target, :opts
 
         # The summary type differentiates instances of `AiSummary` pointing to a single target.
         # See the `summary_type` enum for available options.
         def type
           raise NotImplementedError
         end
 
-        # @returns { Hash } - Content to summarize.
+        # @returns { Array<Hash> } - Content to summarize.
         #
-        # This method returns a hash with the content to summarize and additional information.
-        # The only mandatory key is `contents`, which must be an array of hashes with
-        # the following structure:
+        # This method returns an array of hashes with the content to summarize using the following structure:
         #
         # {
         #  poster: A way to tell who write the content,
         #  id: A number to signal order,
         #  text: Text to summarize
         # }
         #
-        # Additionally, you could add more context, which will be available in the prompt. e.g.:
-        #
-        # {
-        #   resource_path: "#{Discourse.base_path}/t/-/#{target.id}",
-        #   content_title: target.title,
-        #   contents: [...]
-        # }
-        #
         def targets_data
           raise NotImplementedError
         end
 
-        # @returns { DiscourseAi::Completions::Prompt } - Prompt passed to the LLM when concatenating multiple chunks.
-        def contatenation_prompt(_texts_to_summarize)
+        # @returns { DiscourseAi::Completions::Prompt } - Prompt passed to the LLM when extending an existing summary.
+        def summary_extension_prompt(_summary, _texts_to_summarize)
           raise NotImplementedError
         end
 
-        # @returns { DiscourseAi::Completions::Prompt } - Prompt passed to the LLM on each chunk,
-        # and when the whole content fits in one call.
-        def summarize_single_prompt(_input, _opts)
+        # @returns { DiscourseAi::Completions::Prompt } - Prompt passed to the LLM for summarizing a single chunk of content.
+        def first_summary_prompt(_input)
           raise NotImplementedError
         end
       end

diff --git a/lib/summarization/strategies/chat_messages.rb b/lib/summarization/strategies/chat_messages.rb
@@ -14,38 +14,60 @@ def initialize(target, since)
         end
 
         def targets_data
-          content = { content_title: target.name }
-
-          content[:contents] = target
+          target
             .chat_messages
             .where("chat_messages.created_at > ?", since.hours.ago)
             .includes(:user)
             .order(created_at: :asc)
             .pluck(:id, :username_lower, :message)
             .map { { id: _1, poster: _2, text: _3 } }
-
-          content
         end
 
-        def contatenation_prompt(texts_to_summarize)
+        def summary_extension_prompt(summary, contents)
+          input =
+            contents
+              .map { |item| "(#{item[:id]} #{item[:poster]} said: #{item[:text]} " }
+              .join("\n")
+
           prompt = DiscourseAi::Completions::Prompt.new(<<~TEXT.strip)
-          You are a summarization bot tasked with creating a cohesive narrative by intelligently merging multiple disjointed summaries. 
-          Your response should consist of well-structured paragraphs that combines these summaries into a clear and comprehensive overview. 
-          Avoid adding any additional text or commentary. Format your output using Discourse forum Markdown.
+            You are a summarization bot tasked with expanding on an existing summary by incorporating new chat messages.
+            Your goal is to seamlessly integrate the additional information into the existing summary, preserving the clarity and insights of the original while reflecting any new developments, themes, or conclusions.
+            Analyze the new messages to identify key themes, participants' intentions, and any significant decisions or resolutions.
+            Update the summary to include these aspects in a way that remains concise, comprehensive, and accessible to someone with no prior context of the conversation.
+
+            ### Guidelines:
+
+            - Merge the new information naturally with the existing summary without redundancy.
+            - Only include the updated summary, WITHOUT additional commentary.
+            - Don't mention the channel title. Avoid extraneous details or subjective opinions.
+            - Maintain the original language of the text being summarized.
+            - The same user could write multiple messages in a row, don't treat them as different persons.
+            - Aim for summaries to be extended by a reasonable amount, but strive to maintain a total length of 400 words or less, unless absolutely necessary for comprehensiveness.
+
         TEXT
 
           prompt.push(type: :user, content: <<~TEXT.strip)
-          THESE are the summaries, each one separated by a newline, all of them inside <input></input> XML tags:
+          ### Context:
+
+          This is the existing summary:
+
+          #{summary}
 
-          <input>
-            #{texts_to_summarize.join("\n")}
-          </input>
+          These are the new chat messages:
+
+          #{input}
+
+          Intengrate the new messages into the existing summary.
         TEXT
 
           prompt
         end
 
-        def summarize_single_prompt(input, opts)
+        def first_summary_prompt(contents)
+          content_title = target.name
+          input =
+            contents.map { |item| "(#{item[:id]} #{item[:poster]} said: #{item[:text]} " }.join
+
           prompt = DiscourseAi::Completions::Prompt.new(<<~TEXT.strip)
             You are a summarization bot designed to generate clear and insightful paragraphs that conveys the main topics 
             and developments from a series of chat messages within a user-selected time window. 
@@ -62,7 +84,7 @@ def summarize_single_prompt(input, opts)
           TEXT
 
           prompt.push(type: :user, content: <<~TEXT.strip)
-            #{opts[:content_title].present? ? "The name of the channel is: " + opts[:content_title] + ".\n" : ""}
+            #{content_title.present? ? "The name of the channel is: " + content_title + ".\n" : ""}
 
             Here are the messages, inside <input></input> XML tags: