From bc88e2df4320faf727864828cd9e30c22aaea00c Mon Sep 17 00:00:00 2001
From: Sam Saffron <sam.saffron@gmail.com>
Date: Wed, 23 Oct 2024 15:20:57 +1100
Subject: [PATCH 1/2] FEATURE: better logging for automation reports

A new feature_context json column was added to ai_api_audit_logs

This allows us to store rich json like context on any LLM request
made.

This new field now stores automation id and name.

Additionally allows llm_triage to specify maximum number of tokens

This means that you can limit the cost of llm triage by scanning only
first N tokens of a post.
---
 app/models/ai_api_audit_log.rb                |  3 +-
 config/locales/client.en.yml                  |  3 ++
 ...33955_add_feature_context_to_ai_api_log.rb |  7 ++++
 discourse_automation/llm_report.rb            |  1 +
 discourse_automation/llm_triage.rb            |  7 ++++
 lib/automation/llm_triage.rb                  | 19 ++++++++--
 lib/automation/report_runner.rb               |  8 +++-
 lib/completions/endpoints/base.rb             | 10 ++++-
 lib/completions/endpoints/canned_response.rb  |  8 +++-
 lib/completions/endpoints/fake.rb             |  8 +++-
 lib/completions/endpoints/open_ai.rb          |  9 ++++-
 lib/completions/llm.rb                        |  2 +
 spec/lib/completions/llm_spec.rb              | 38 +++++++++++++++++++
 .../discourse_automation/llm_triage_spec.rb   | 38 ++++++++++++++++---
 14 files changed, 147 insertions(+), 14 deletions(-)
 create mode 100644 db/migrate/20241023033955_add_feature_context_to_ai_api_log.rb

diff --git a/app/models/ai_api_audit_log.rb b/app/models/ai_api_audit_log.rb
index c38d68eb4..c2d3c5dd8 100644
--- a/app/models/ai_api_audit_log.rb
+++ b/app/models/ai_api_audit_log.rb
@@ -33,4 +33,5 @@ module Provider
 #  post_id              :integer
 #  feature_name         :string(255)
 #  language_model       :string(255)
-#
+#  feature_context      :jsonb
+
diff --git a/config/locales/client.en.yml b/config/locales/client.en.yml
index a58b987a1..ef24a4368 100644
--- a/config/locales/client.en.yml
+++ b/config/locales/client.en.yml
@@ -81,6 +81,9 @@ en:
             system_prompt:
               label: "System Prompt"
               description: "The prompt that will be used to triage, be sure for it to reply with a single word you can use to trigger the action"
+            max_post_tokens:
+              label: "Max Post Tokens"
+              description: "The maximum number of tokens to scan using LLM triage"
             search_for_text:
               label: "Search for text"
               description: "If the following text appears in the llm reply, apply this actions"
diff --git a/db/migrate/20241023033955_add_feature_context_to_ai_api_log.rb b/db/migrate/20241023033955_add_feature_context_to_ai_api_log.rb
new file mode 100644
index 000000000..5191541cf
--- /dev/null
+++ b/db/migrate/20241023033955_add_feature_context_to_ai_api_log.rb
@@ -0,0 +1,7 @@
+# frozen_string_literal: true
+#
+class AddFeatureContextToAiApiLog < ActiveRecord::Migration[7.1]
+  def change
+    add_column :ai_api_audit_logs, :feature_context, :jsonb
+  end
+end
diff --git a/discourse_automation/llm_report.rb b/discourse_automation/llm_report.rb
index c190af0f3..e309fbbb3 100644
--- a/discourse_automation/llm_report.rb
+++ b/discourse_automation/llm_report.rb
@@ -93,6 +93,7 @@ module DiscourseAutomation::LlmReport
           temperature: temperature,
           top_p: top_p,
           suppress_notifications: suppress_notifications,
+          automation: self.automation,
         )
       rescue => e
         Discourse.warn_exception e, message: "Error running LLM report!"
diff --git a/discourse_automation/llm_triage.rb b/discourse_automation/llm_triage.rb
index ad90d3619..6f03b2290 100644
--- a/discourse_automation/llm_triage.rb
+++ b/discourse_automation/llm_triage.rb
@@ -11,6 +11,7 @@
 
     field :system_prompt, component: :message, required: false
     field :search_for_text, component: :text, required: true
+    field :max_post_tokens, component: :text
     field :model,
           component: :choices,
           required: true,
@@ -49,6 +50,11 @@
       hide_topic = fields.dig("hide_topic", "value")
       flag_post = fields.dig("flag_post", "value")
       flag_type = fields.dig("flag_type", "value")
+      max_post_tokens = fields.dig("max_post_tokens", "value").to_i
+
+      if max_post_tokens <= 0
+        max_post_tokens = nil
+      end
 
       begin
         RateLimiter.new(
@@ -77,6 +83,7 @@
           hide_topic: hide_topic,
           flag_post: flag_post,
           flag_type: flag_type.to_s.to_sym,
+          max_post_tokens: max_post_tokens,
           automation: self.automation,
         )
       rescue => e
diff --git a/lib/automation/llm_triage.rb b/lib/automation/llm_triage.rb
index 103119e72..b11350a9f 100644
--- a/lib/automation/llm_triage.rb
+++ b/lib/automation/llm_triage.rb
@@ -15,20 +15,29 @@ def self.handle(
         hide_topic: nil,
         flag_post: nil,
         flag_type: nil,
-        automation: nil
+        automation: nil,
+        max_post_tokens: nil
       )
         if category_id.blank? && tags.blank? && canned_reply.blank? && hide_topic.blank? &&
              flag_post.blank?
           raise ArgumentError, "llm_triage: no action specified!"
         end
 
+        llm = DiscourseAi::Completions::Llm.proxy(model)
+
         s_prompt = system_prompt.to_s.sub("%%POST%%", "") # Backwards-compat. We no longer sub this.
         prompt = DiscourseAi::Completions::Prompt.new(s_prompt)
-        prompt.push(type: :user, content: "title: #{post.topic.title}\n#{post.raw}")
+
+        content = "title: #{post.topic.title}\n#{post.raw}"
+
+        if max_post_tokens.present?
+          content = llm.tokenizer.truncate(content, max_post_tokens)
+        end
+
+        prompt.push(type: :user, content: content)
 
         result = nil
 
-        llm = DiscourseAi::Completions::Llm.proxy(model)
 
         result =
           llm.generate(
@@ -37,6 +46,10 @@ def self.handle(
             max_tokens: 700, # ~500 words
             user: Discourse.system_user,
             feature_name: "llm_triage",
+            feature_context: {
+              automation_id: automation&.id,
+              automation_name: automation&.name,
+            }
           )&.strip
 
         if result.present? && result.downcase.include?(search_for_text.downcase)
diff --git a/lib/automation/report_runner.rb b/lib/automation/report_runner.rb
index 842de0bfe..b961aa120 100644
--- a/lib/automation/report_runner.rb
+++ b/lib/automation/report_runner.rb
@@ -53,7 +53,8 @@ def initialize(
         exclude_tags: nil,
         top_p: 0.1,
         temperature: 0.2,
-        suppress_notifications: false
+        suppress_notifications: false,
+        automation: nil
       )
         @sender = User.find_by(username: sender_username)
         @receivers = User.where(username: receivers)
@@ -90,6 +91,7 @@ def initialize(
         if !@topic_id && !@receivers.present? && !@email_receivers.present?
           raise ArgumentError, "Must specify topic_id or receivers"
         end
+        @automation = automation
       end
 
       def run!
@@ -153,6 +155,10 @@ def run!
           top_p: @top_p,
           user: Discourse.system_user,
           feature_name: "ai_report",
+          feature_context: {
+            automation_id: @automation&.id,
+            automation_name: @automation&.name,
+          }
         ) do |response|
           print response if Rails.env.development? && @debug_mode
           result << response
diff --git a/lib/completions/endpoints/base.rb b/lib/completions/endpoints/base.rb
index 3782d735d..a0405b42c 100644
--- a/lib/completions/endpoints/base.rb
+++ b/lib/completions/endpoints/base.rb
@@ -56,7 +56,14 @@ def xml_tags_to_strip(dialect)
           []
         end
 
-        def perform_completion!(dialect, user, model_params = {}, feature_name: nil, &blk)
+        def perform_completion!(
+          dialect,
+          user,
+          model_params = {},
+          feature_name: nil,
+          feature_context: nil,
+          &blk
+        )
           allow_tools = dialect.prompt.has_tools?
           model_params = normalize_model_params(model_params)
           orig_blk = blk
@@ -111,6 +118,7 @@ def perform_completion!(dialect, user, model_params = {}, feature_name: nil, &bl
                   post_id: dialect.prompt.post_id,
                   feature_name: feature_name,
                   language_model: llm_model.name,
+                  feature_context: feature_context.present? ? feature_context.as_json : nil,
                 )
 
               if !@streaming_mode
diff --git a/lib/completions/endpoints/canned_response.rb b/lib/completions/endpoints/canned_response.rb
index ee0c9e5f0..eaef21da2 100644
--- a/lib/completions/endpoints/canned_response.rb
+++ b/lib/completions/endpoints/canned_response.rb
@@ -23,7 +23,13 @@ def prompt_messages
           dialect.prompt.messages
         end
 
-        def perform_completion!(dialect, _user, _model_params, feature_name: nil)
+        def perform_completion!(
+          dialect,
+          _user,
+          _model_params,
+          feature_name: nil,
+          feature_context: nil
+        )
           @dialect = dialect
           response = responses[completions]
           if response.nil?
diff --git a/lib/completions/endpoints/fake.rb b/lib/completions/endpoints/fake.rb
index 72d24d574..2beec61a7 100644
--- a/lib/completions/endpoints/fake.rb
+++ b/lib/completions/endpoints/fake.rb
@@ -100,7 +100,13 @@ def self.last_call=(params)
           @last_call = params
         end
 
-        def perform_completion!(dialect, user, model_params = {}, feature_name: nil)
+        def perform_completion!(
+          dialect,
+          user,
+          model_params = {},
+          feature_name: nil,
+          feature_context: nil
+        )
           self.class.last_call = { dialect: dialect, user: user, model_params: model_params }
 
           content = self.class.fake_content
diff --git a/lib/completions/endpoints/open_ai.rb b/lib/completions/endpoints/open_ai.rb
index 35b3e724c..a8cff5950 100644
--- a/lib/completions/endpoints/open_ai.rb
+++ b/lib/completions/endpoints/open_ai.rb
@@ -27,7 +27,14 @@ def provider_id
           AiApiAuditLog::Provider::OpenAI
         end
 
-        def perform_completion!(dialect, user, model_params = {}, feature_name: nil, &blk)
+        def perform_completion!(
+          dialect,
+          user,
+          model_params = {},
+          feature_name: nil,
+          feature_context: nil,
+          &blk
+        )
           if dialect.respond_to?(:is_gpt_o?) && dialect.is_gpt_o? && block_given?
             # we need to disable streaming and simulate it
             blk.call "", lambda { |*| }
diff --git a/lib/completions/llm.rb b/lib/completions/llm.rb
index 445bfc199..0d53b4139 100644
--- a/lib/completions/llm.rb
+++ b/lib/completions/llm.rb
@@ -191,6 +191,7 @@ def generate(
         stop_sequences: nil,
         user:,
         feature_name: nil,
+        feature_context: nil,
         &partial_read_blk
       )
         self.class.record_prompt(prompt)
@@ -224,6 +225,7 @@ def generate(
           user,
           model_params,
           feature_name: feature_name,
+          feature_context: feature_context,
           &partial_read_blk
         )
       end
diff --git a/spec/lib/completions/llm_spec.rb b/spec/lib/completions/llm_spec.rb
index e6402b153..2d946e65c 100644
--- a/spec/lib/completions/llm_spec.rb
+++ b/spec/lib/completions/llm_spec.rb
@@ -55,6 +55,44 @@
       expect(log.topic_id).to eq(123)
       expect(log.post_id).to eq(1)
     end
+
+    it "can track feature_name and feature_context" do
+      body = {
+        model: "gpt-3.5-turbo-0301",
+        usage: {
+          prompt_tokens: 337,
+          completion_tokens: 162,
+          total_tokens: 499,
+        },
+        choices: [
+          {
+            message: { role: "assistant", content: "test" },
+            finish_reason: "stop",
+            index: 0,
+          },
+        ],
+      }.to_json
+
+      WebMock.stub_request(:post, "https://api.openai.com/v1/chat/completions").to_return(
+        status: 200,
+        body: body,
+      )
+
+      result =
+        described_class.proxy("custom:#{model.id}").generate(
+          "Hello",
+          user: user,
+          feature_name: "llm_triage",
+          feature_context: {
+            foo: "bar",
+          },
+        )
+
+      expect(result).to eq("test")
+      log = AiApiAuditLog.order("id desc").first
+      expect(log.feature_name).to eq("llm_triage")
+      expect(log.feature_context).to eq({ "foo" => "bar" })
+    end
   end
 
   describe "#generate with fake model" do
diff --git a/spec/lib/discourse_automation/llm_triage_spec.rb b/spec/lib/discourse_automation/llm_triage_spec.rb
index 0a8965929..26aea4d44 100644
--- a/spec/lib/discourse_automation/llm_triage_spec.rb
+++ b/spec/lib/discourse_automation/llm_triage_spec.rb
@@ -32,15 +32,31 @@ def add_automation_field(name, value, type: "text")
     add_automation_field("flag_post", true, type: "boolean")
     add_automation_field("canned_reply", "Yo this is a reply")
     add_automation_field("canned_reply_user", reply_user.username, type: "user")
+    add_automation_field("max_post_tokens", 100)
   end
 
   it "can trigger via automation" do
-    post = Fabricate(:post)
+    post = Fabricate(:post, raw: "hello " * 5000)
 
-    DiscourseAi::Completions::Llm.with_prepared_responses(["bad"]) do
-      automation.running_in_background!
-      automation.trigger!({ "post" => post })
-    end
+    body = {
+      model: "gpt-3.5-turbo-0301",
+      usage: {
+        prompt_tokens: 337,
+        completion_tokens: 162,
+        total_tokens: 499,
+      },
+      choices: [
+        { message: { role: "assistant", content: "bad" }, finish_reason: "stop", index: 0 },
+      ],
+    }.to_json
+
+    WebMock.stub_request(:post, "https://api.openai.com/v1/chat/completions").to_return(
+      status: 200,
+      body: body,
+    )
+
+    automation.running_in_background!
+    automation.trigger!({ "post" => post })
 
     topic = post.topic.reload
     expect(topic.category_id).to eq(category.id)
@@ -49,6 +65,18 @@ def add_automation_field(name, value, type: "text")
     reply = topic.posts.order(:post_number).last
     expect(reply.raw).to eq("Yo this is a reply")
     expect(reply.user.id).to eq(reply_user.id)
+
+    ai_log = AiApiAuditLog.order("id desc").first
+    expect(ai_log.feature_name).to eq("llm_triage")
+    expect(ai_log.feature_context).to eq(
+      { "automation_id" => automation.id, "automation_name" => automation.name },
+    )
+
+    count = ai_log.raw_request_payload.scan("hello").size
+    # we could use the exact count here but it can get fragile
+    # as we change tokenizers, this will give us reasonable confidence
+    expect(count).to be <= (100)
+    expect(count).to be > (50)
   end
 
   it "does not reply to the canned_reply_user" do

From a33b705d2fc5f5e4348a6002287264fd7c10852f Mon Sep 17 00:00:00 2001
From: Sam Saffron <sam.saffron@gmail.com>
Date: Wed, 23 Oct 2024 15:27:23 +1100
Subject: [PATCH 2/2] lint

---
 app/models/ai_api_audit_log.rb     | 1 -
 discourse_automation/llm_triage.rb | 4 +---
 lib/automation/llm_triage.rb       | 7 ++-----
 lib/automation/report_runner.rb    | 2 +-
 spec/lib/completions/llm_spec.rb   | 6 +-----
 5 files changed, 5 insertions(+), 15 deletions(-)

diff --git a/app/models/ai_api_audit_log.rb b/app/models/ai_api_audit_log.rb
index c2d3c5dd8..2fa9f5c37 100644
--- a/app/models/ai_api_audit_log.rb
+++ b/app/models/ai_api_audit_log.rb
@@ -34,4 +34,3 @@ module Provider
 #  feature_name         :string(255)
 #  language_model       :string(255)
 #  feature_context      :jsonb
-
diff --git a/discourse_automation/llm_triage.rb b/discourse_automation/llm_triage.rb
index 6f03b2290..6ef370793 100644
--- a/discourse_automation/llm_triage.rb
+++ b/discourse_automation/llm_triage.rb
@@ -52,9 +52,7 @@
       flag_type = fields.dig("flag_type", "value")
       max_post_tokens = fields.dig("max_post_tokens", "value").to_i
 
-      if max_post_tokens <= 0
-        max_post_tokens = nil
-      end
+      max_post_tokens = nil if max_post_tokens <= 0
 
       begin
         RateLimiter.new(
diff --git a/lib/automation/llm_triage.rb b/lib/automation/llm_triage.rb
index b11350a9f..253a27243 100644
--- a/lib/automation/llm_triage.rb
+++ b/lib/automation/llm_triage.rb
@@ -30,15 +30,12 @@ def self.handle(
 
         content = "title: #{post.topic.title}\n#{post.raw}"
 
-        if max_post_tokens.present?
-          content = llm.tokenizer.truncate(content, max_post_tokens)
-        end
+        content = llm.tokenizer.truncate(content, max_post_tokens) if max_post_tokens.present?
 
         prompt.push(type: :user, content: content)
 
         result = nil
 
-
         result =
           llm.generate(
             prompt,
@@ -49,7 +46,7 @@ def self.handle(
             feature_context: {
               automation_id: automation&.id,
               automation_name: automation&.name,
-            }
+            },
           )&.strip
 
         if result.present? && result.downcase.include?(search_for_text.downcase)
diff --git a/lib/automation/report_runner.rb b/lib/automation/report_runner.rb
index b961aa120..02363a45b 100644
--- a/lib/automation/report_runner.rb
+++ b/lib/automation/report_runner.rb
@@ -158,7 +158,7 @@ def run!
           feature_context: {
             automation_id: @automation&.id,
             automation_name: @automation&.name,
-          }
+          },
         ) do |response|
           print response if Rails.env.development? && @debug_mode
           result << response
diff --git a/spec/lib/completions/llm_spec.rb b/spec/lib/completions/llm_spec.rb
index 2d946e65c..4f22c16fb 100644
--- a/spec/lib/completions/llm_spec.rb
+++ b/spec/lib/completions/llm_spec.rb
@@ -65,11 +65,7 @@
           total_tokens: 499,
         },
         choices: [
-          {
-            message: { role: "assistant", content: "test" },
-            finish_reason: "stop",
-            index: 0,
-          },
+          { message: { role: "assistant", content: "test" }, finish_reason: "stop", index: 0 },
         ],
       }.to_json