discourse · SamSaffron · Oct 23, 2024 · Oct 23, 2024 · Oct 23, 2024 · lis2
diff --git a/app/models/ai_api_audit_log.rb b/app/models/ai_api_audit_log.rb
@@ -33,4 +33,4 @@ module Provider
 #  post_id              :integer
 #  feature_name         :string(255)
 #  language_model       :string(255)
-#
+#  feature_context      :jsonb
diff --git a/config/locales/client.en.yml b/config/locales/client.en.yml
@@ -81,6 +81,9 @@ en:
             system_prompt:
               label: "System Prompt"
               description: "The prompt that will be used to triage, be sure for it to reply with a single word you can use to trigger the action"
+            max_post_tokens:
+              label: "Max Post Tokens"
+              description: "The maximum number of tokens to scan using LLM triage"
             search_for_text:
               label: "Search for text"
               description: "If the following text appears in the llm reply, apply this actions"

diff --git a/db/migrate/20241023033955_add_feature_context_to_ai_api_log.rb b/db/migrate/20241023033955_add_feature_context_to_ai_api_log.rb
@@ -0,0 +1,7 @@
+# frozen_string_literal: true
+#
+class AddFeatureContextToAiApiLog < ActiveRecord::Migration[7.1]
+  def change
+    add_column :ai_api_audit_logs, :feature_context, :jsonb
+  end
+end
diff --git a/discourse_automation/llm_report.rb b/discourse_automation/llm_report.rb
@@ -93,6 +93,7 @@ module DiscourseAutomation::LlmReport
           temperature: temperature,
           top_p: top_p,
           suppress_notifications: suppress_notifications,
+          automation: self.automation,
         )
       rescue => e
         Discourse.warn_exception e, message: "Error running LLM report!"

diff --git a/discourse_automation/llm_triage.rb b/discourse_automation/llm_triage.rb
@@ -11,6 +11,7 @@
 
     field :system_prompt, component: :message, required: false
     field :search_for_text, component: :text, required: true
+    field :max_post_tokens, component: :text
     field :model,
           component: :choices,
           required: true,
@@ -49,6 +50,9 @@
       hide_topic = fields.dig("hide_topic", "value")
       flag_post = fields.dig("flag_post", "value")
       flag_type = fields.dig("flag_type", "value")
+      max_post_tokens = fields.dig("max_post_tokens", "value").to_i
+
+      max_post_tokens = nil if max_post_tokens <= 0
 
       begin
         RateLimiter.new(
@@ -77,6 +81,7 @@
           hide_topic: hide_topic,
           flag_post: flag_post,
           flag_type: flag_type.to_s.to_sym,
+          max_post_tokens: max_post_tokens,
           automation: self.automation,
         )
       rescue => e

diff --git a/lib/automation/llm_triage.rb b/lib/automation/llm_triage.rb
@@ -15,20 +15,26 @@ def self.handle(
         hide_topic: nil,
         flag_post: nil,
         flag_type: nil,
-        automation: nil
+        automation: nil,
+        max_post_tokens: nil
       )
         if category_id.blank? && tags.blank? && canned_reply.blank? && hide_topic.blank? &&
              flag_post.blank?
           raise ArgumentError, "llm_triage: no action specified!"
         end
 
+        llm = DiscourseAi::Completions::Llm.proxy(model)
+
         s_prompt = system_prompt.to_s.sub("%%POST%%", "") # Backwards-compat. We no longer sub this.
         prompt = DiscourseAi::Completions::Prompt.new(s_prompt)
-        prompt.push(type: :user, content: "title: #{post.topic.title}\n#{post.raw}")
 
-        result = nil
+        content = "title: #{post.topic.title}\n#{post.raw}"
 
-        llm = DiscourseAi::Completions::Llm.proxy(model)
+        content = llm.tokenizer.truncate(content, max_post_tokens) if max_post_tokens.present?
+
+        prompt.push(type: :user, content: content)
+
+        result = nil
 
         result =
           llm.generate(
@@ -37,6 +43,10 @@ def self.handle(
             max_tokens: 700, # ~500 words
             user: Discourse.system_user,
             feature_name: "llm_triage",
+            feature_context: {
+              automation_id: automation&.id,
+              automation_name: automation&.name,
+            },
           )&.strip
 
         if result.present? && result.downcase.include?(search_for_text.downcase)

diff --git a/lib/automation/report_runner.rb b/lib/automation/report_runner.rb
@@ -53,7 +53,8 @@ def initialize(
         exclude_tags: nil,
         top_p: 0.1,
         temperature: 0.2,
-        suppress_notifications: false
+        suppress_notifications: false,
+        automation: nil
       )
         @sender = User.find_by(username: sender_username)
         @receivers = User.where(username: receivers)
@@ -90,6 +91,7 @@ def initialize(
         if !@topic_id && [email protected]? && !@email_receivers.present?
           raise ArgumentError, "Must specify topic_id or receivers"
         end
+        @automation = automation
       end
 
       def run!
@@ -153,6 +155,10 @@ def run!
           top_p: @top_p,
           user: Discourse.system_user,
           feature_name: "ai_report",
+          feature_context: {
+            automation_id: @automation&.id,
+            automation_name: @automation&.name,
+          },
         ) do |response|
           print response if Rails.env.development? && @debug_mode
           result << response

diff --git a/lib/completions/endpoints/base.rb b/lib/completions/endpoints/base.rb
@@ -56,7 +56,14 @@ def xml_tags_to_strip(dialect)
           []
         end
 
-        def perform_completion!(dialect, user, model_params = {}, feature_name: nil, &blk)
+        def perform_completion!(
+          dialect,
+          user,
+          model_params = {},
+          feature_name: nil,
+          feature_context: nil,
+          &blk
+        )
           allow_tools = dialect.prompt.has_tools?
           model_params = normalize_model_params(model_params)
           orig_blk = blk
@@ -111,6 +118,7 @@ def perform_completion!(dialect, user, model_params = {}, feature_name: nil, &bl
                   post_id: dialect.prompt.post_id,
                   feature_name: feature_name,
                   language_model: llm_model.name,
+                  feature_context: feature_context.present? ? feature_context.as_json : nil,
                 )
 
               if !@streaming_mode

diff --git a/lib/completions/endpoints/canned_response.rb b/lib/completions/endpoints/canned_response.rb
@@ -23,7 +23,13 @@ def prompt_messages
           dialect.prompt.messages
         end
 
-        def perform_completion!(dialect, _user, _model_params, feature_name: nil)
+        def perform_completion!(
+          dialect,
+          _user,
+          _model_params,
+          feature_name: nil,
+          feature_context: nil
+        )
           @dialect = dialect
           response = responses[completions]
           if response.nil?

diff --git a/lib/completions/endpoints/fake.rb b/lib/completions/endpoints/fake.rb
@@ -100,7 +100,13 @@ def self.last_call=(params)
           @last_call = params
         end
 
-        def perform_completion!(dialect, user, model_params = {}, feature_name: nil)
+        def perform_completion!(
+          dialect,
+          user,
+          model_params = {},
+          feature_name: nil,
+          feature_context: nil
+        )
           self.class.last_call = { dialect: dialect, user: user, model_params: model_params }
 
           content = self.class.fake_content

diff --git a/lib/completions/endpoints/open_ai.rb b/lib/completions/endpoints/open_ai.rb
@@ -27,7 +27,14 @@ def provider_id
           AiApiAuditLog::Provider::OpenAI
         end
 
-        def perform_completion!(dialect, user, model_params = {}, feature_name: nil, &blk)
+        def perform_completion!(
+          dialect,
+          user,
+          model_params = {},
+          feature_name: nil,
+          feature_context: nil,
+          &blk
+        )
           if dialect.respond_to?(:is_gpt_o?) && dialect.is_gpt_o? && block_given?
             # we need to disable streaming and simulate it
             blk.call "", lambda { |*| }

diff --git a/lib/completions/llm.rb b/lib/completions/llm.rb
@@ -191,6 +191,7 @@ def generate(
         stop_sequences: nil,
         user:,
         feature_name: nil,
+        feature_context: nil,
         &partial_read_blk
       )
         self.class.record_prompt(prompt)
@@ -224,6 +225,7 @@ def generate(
           user,
           model_params,
           feature_name: feature_name,
+          feature_context: feature_context,
           &partial_read_blk
         )
       end

diff --git a/spec/lib/completions/llm_spec.rb b/spec/lib/completions/llm_spec.rb
@@ -55,6 +55,40 @@
       expect(log.topic_id).to eq(123)
       expect(log.post_id).to eq(1)
     end
+
+    it "can track feature_name and feature_context" do
+      body = {
+        model: "gpt-3.5-turbo-0301",
+        usage: {
+          prompt_tokens: 337,
+          completion_tokens: 162,
+          total_tokens: 499,
+        },
+        choices: [
+          { message: { role: "assistant", content: "test" }, finish_reason: "stop", index: 0 },
+        ],
+      }.to_json
+
+      WebMock.stub_request(:post, "https://api.openai.com/v1/chat/completions").to_return(
+        status: 200,
+        body: body,
+      )
+
+      result =
+        described_class.proxy("custom:#{model.id}").generate(
+          "Hello",
+          user: user,
+          feature_name: "llm_triage",
+          feature_context: {
+            foo: "bar",
+          },
+        )
+
+      expect(result).to eq("test")
+      log = AiApiAuditLog.order("id desc").first
+      expect(log.feature_name).to eq("llm_triage")
+      expect(log.feature_context).to eq({ "foo" => "bar" })
+    end
   end
 
   describe "#generate with fake model" do

diff --git a/spec/lib/discourse_automation/llm_triage_spec.rb b/spec/lib/discourse_automation/llm_triage_spec.rb
@@ -32,15 +32,31 @@ def add_automation_field(name, value, type: "text")
     add_automation_field("flag_post", true, type: "boolean")
     add_automation_field("canned_reply", "Yo this is a reply")
     add_automation_field("canned_reply_user", reply_user.username, type: "user")
+    add_automation_field("max_post_tokens", 100)
   end
 
   it "can trigger via automation" do
-    post = Fabricate(:post)
+    post = Fabricate(:post, raw: "hello " * 5000)
 
-    DiscourseAi::Completions::Llm.with_prepared_responses(["bad"]) do
-      automation.running_in_background!
-      automation.trigger!({ "post" => post })
-    end
+    body = {
+      model: "gpt-3.5-turbo-0301",
+      usage: {
+        prompt_tokens: 337,
+        completion_tokens: 162,
+        total_tokens: 499,
+      },
+      choices: [
+        { message: { role: "assistant", content: "bad" }, finish_reason: "stop", index: 0 },
+      ],
+    }.to_json
+
+    WebMock.stub_request(:post, "https://api.openai.com/v1/chat/completions").to_return(
+      status: 200,
+      body: body,
+    )
+
+    automation.running_in_background!
+    automation.trigger!({ "post" => post })
 
     topic = post.topic.reload
     expect(topic.category_id).to eq(category.id)
@@ -49,6 +65,18 @@ def add_automation_field(name, value, type: "text")
     reply = topic.posts.order(:post_number).last
     expect(reply.raw).to eq("Yo this is a reply")
     expect(reply.user.id).to eq(reply_user.id)
+
+    ai_log = AiApiAuditLog.order("id desc").first
+    expect(ai_log.feature_name).to eq("llm_triage")
+    expect(ai_log.feature_context).to eq(
+      { "automation_id" => automation.id, "automation_name" => automation.name },
+    )
+
+    count = ai_log.raw_request_payload.scan("hello").size
+    # we could use the exact count here but it can get fragile
+    # as we change tokenizers, this will give us reasonable confidence
+    expect(count).to be <= (100)
+    expect(count).to be > (50)
   end
 
   it "does not reply to the canned_reply_user" do