From bc88e2df4320faf727864828cd9e30c22aaea00c Mon Sep 17 00:00:00 2001 From: Sam Saffron Date: Wed, 23 Oct 2024 15:20:57 +1100 Subject: [PATCH 1/2] FEATURE: better logging for automation reports A new feature_context json column was added to ai_api_audit_logs This allows us to store rich json like context on any LLM request made. This new field now stores automation id and name. Additionally allows llm_triage to specify maximum number of tokens This means that you can limit the cost of llm triage by scanning only first N tokens of a post. --- app/models/ai_api_audit_log.rb | 3 +- config/locales/client.en.yml | 3 ++ ...33955_add_feature_context_to_ai_api_log.rb | 7 ++++ discourse_automation/llm_report.rb | 1 + discourse_automation/llm_triage.rb | 7 ++++ lib/automation/llm_triage.rb | 19 ++++++++-- lib/automation/report_runner.rb | 8 +++- lib/completions/endpoints/base.rb | 10 ++++- lib/completions/endpoints/canned_response.rb | 8 +++- lib/completions/endpoints/fake.rb | 8 +++- lib/completions/endpoints/open_ai.rb | 9 ++++- lib/completions/llm.rb | 2 + spec/lib/completions/llm_spec.rb | 38 +++++++++++++++++++ .../discourse_automation/llm_triage_spec.rb | 38 ++++++++++++++++--- 14 files changed, 147 insertions(+), 14 deletions(-) create mode 100644 db/migrate/20241023033955_add_feature_context_to_ai_api_log.rb diff --git a/app/models/ai_api_audit_log.rb b/app/models/ai_api_audit_log.rb index c38d68eb4..c2d3c5dd8 100644 --- a/app/models/ai_api_audit_log.rb +++ b/app/models/ai_api_audit_log.rb @@ -33,4 +33,5 @@ module Provider # post_id :integer # feature_name :string(255) # language_model :string(255) -# +# feature_context :jsonb + diff --git a/config/locales/client.en.yml b/config/locales/client.en.yml index a58b987a1..ef24a4368 100644 --- a/config/locales/client.en.yml +++ b/config/locales/client.en.yml @@ -81,6 +81,9 @@ en: system_prompt: label: "System Prompt" description: "The prompt that will be used to triage, be sure for it to reply with a single word you can use to trigger the action" + max_post_tokens: + label: "Max Post Tokens" + description: "The maximum number of tokens to scan using LLM triage" search_for_text: label: "Search for text" description: "If the following text appears in the llm reply, apply this actions" diff --git a/db/migrate/20241023033955_add_feature_context_to_ai_api_log.rb b/db/migrate/20241023033955_add_feature_context_to_ai_api_log.rb new file mode 100644 index 000000000..5191541cf --- /dev/null +++ b/db/migrate/20241023033955_add_feature_context_to_ai_api_log.rb @@ -0,0 +1,7 @@ +# frozen_string_literal: true +# +class AddFeatureContextToAiApiLog < ActiveRecord::Migration[7.1] + def change + add_column :ai_api_audit_logs, :feature_context, :jsonb + end +end diff --git a/discourse_automation/llm_report.rb b/discourse_automation/llm_report.rb index c190af0f3..e309fbbb3 100644 --- a/discourse_automation/llm_report.rb +++ b/discourse_automation/llm_report.rb @@ -93,6 +93,7 @@ module DiscourseAutomation::LlmReport temperature: temperature, top_p: top_p, suppress_notifications: suppress_notifications, + automation: self.automation, ) rescue => e Discourse.warn_exception e, message: "Error running LLM report!" diff --git a/discourse_automation/llm_triage.rb b/discourse_automation/llm_triage.rb index ad90d3619..6f03b2290 100644 --- a/discourse_automation/llm_triage.rb +++ b/discourse_automation/llm_triage.rb @@ -11,6 +11,7 @@ field :system_prompt, component: :message, required: false field :search_for_text, component: :text, required: true + field :max_post_tokens, component: :text field :model, component: :choices, required: true, @@ -49,6 +50,11 @@ hide_topic = fields.dig("hide_topic", "value") flag_post = fields.dig("flag_post", "value") flag_type = fields.dig("flag_type", "value") + max_post_tokens = fields.dig("max_post_tokens", "value").to_i + + if max_post_tokens <= 0 + max_post_tokens = nil + end begin RateLimiter.new( @@ -77,6 +83,7 @@ hide_topic: hide_topic, flag_post: flag_post, flag_type: flag_type.to_s.to_sym, + max_post_tokens: max_post_tokens, automation: self.automation, ) rescue => e diff --git a/lib/automation/llm_triage.rb b/lib/automation/llm_triage.rb index 103119e72..b11350a9f 100644 --- a/lib/automation/llm_triage.rb +++ b/lib/automation/llm_triage.rb @@ -15,20 +15,29 @@ def self.handle( hide_topic: nil, flag_post: nil, flag_type: nil, - automation: nil + automation: nil, + max_post_tokens: nil ) if category_id.blank? && tags.blank? && canned_reply.blank? && hide_topic.blank? && flag_post.blank? raise ArgumentError, "llm_triage: no action specified!" end + llm = DiscourseAi::Completions::Llm.proxy(model) + s_prompt = system_prompt.to_s.sub("%%POST%%", "") # Backwards-compat. We no longer sub this. prompt = DiscourseAi::Completions::Prompt.new(s_prompt) - prompt.push(type: :user, content: "title: #{post.topic.title}\n#{post.raw}") + + content = "title: #{post.topic.title}\n#{post.raw}" + + if max_post_tokens.present? + content = llm.tokenizer.truncate(content, max_post_tokens) + end + + prompt.push(type: :user, content: content) result = nil - llm = DiscourseAi::Completions::Llm.proxy(model) result = llm.generate( @@ -37,6 +46,10 @@ def self.handle( max_tokens: 700, # ~500 words user: Discourse.system_user, feature_name: "llm_triage", + feature_context: { + automation_id: automation&.id, + automation_name: automation&.name, + } )&.strip if result.present? && result.downcase.include?(search_for_text.downcase) diff --git a/lib/automation/report_runner.rb b/lib/automation/report_runner.rb index 842de0bfe..b961aa120 100644 --- a/lib/automation/report_runner.rb +++ b/lib/automation/report_runner.rb @@ -53,7 +53,8 @@ def initialize( exclude_tags: nil, top_p: 0.1, temperature: 0.2, - suppress_notifications: false + suppress_notifications: false, + automation: nil ) @sender = User.find_by(username: sender_username) @receivers = User.where(username: receivers) @@ -90,6 +91,7 @@ def initialize( if !@topic_id && !@receivers.present? && !@email_receivers.present? raise ArgumentError, "Must specify topic_id or receivers" end + @automation = automation end def run! @@ -153,6 +155,10 @@ def run! top_p: @top_p, user: Discourse.system_user, feature_name: "ai_report", + feature_context: { + automation_id: @automation&.id, + automation_name: @automation&.name, + } ) do |response| print response if Rails.env.development? && @debug_mode result << response diff --git a/lib/completions/endpoints/base.rb b/lib/completions/endpoints/base.rb index 3782d735d..a0405b42c 100644 --- a/lib/completions/endpoints/base.rb +++ b/lib/completions/endpoints/base.rb @@ -56,7 +56,14 @@ def xml_tags_to_strip(dialect) [] end - def perform_completion!(dialect, user, model_params = {}, feature_name: nil, &blk) + def perform_completion!( + dialect, + user, + model_params = {}, + feature_name: nil, + feature_context: nil, + &blk + ) allow_tools = dialect.prompt.has_tools? model_params = normalize_model_params(model_params) orig_blk = blk @@ -111,6 +118,7 @@ def perform_completion!(dialect, user, model_params = {}, feature_name: nil, &bl post_id: dialect.prompt.post_id, feature_name: feature_name, language_model: llm_model.name, + feature_context: feature_context.present? ? feature_context.as_json : nil, ) if !@streaming_mode diff --git a/lib/completions/endpoints/canned_response.rb b/lib/completions/endpoints/canned_response.rb index ee0c9e5f0..eaef21da2 100644 --- a/lib/completions/endpoints/canned_response.rb +++ b/lib/completions/endpoints/canned_response.rb @@ -23,7 +23,13 @@ def prompt_messages dialect.prompt.messages end - def perform_completion!(dialect, _user, _model_params, feature_name: nil) + def perform_completion!( + dialect, + _user, + _model_params, + feature_name: nil, + feature_context: nil + ) @dialect = dialect response = responses[completions] if response.nil? diff --git a/lib/completions/endpoints/fake.rb b/lib/completions/endpoints/fake.rb index 72d24d574..2beec61a7 100644 --- a/lib/completions/endpoints/fake.rb +++ b/lib/completions/endpoints/fake.rb @@ -100,7 +100,13 @@ def self.last_call=(params) @last_call = params end - def perform_completion!(dialect, user, model_params = {}, feature_name: nil) + def perform_completion!( + dialect, + user, + model_params = {}, + feature_name: nil, + feature_context: nil + ) self.class.last_call = { dialect: dialect, user: user, model_params: model_params } content = self.class.fake_content diff --git a/lib/completions/endpoints/open_ai.rb b/lib/completions/endpoints/open_ai.rb index 35b3e724c..a8cff5950 100644 --- a/lib/completions/endpoints/open_ai.rb +++ b/lib/completions/endpoints/open_ai.rb @@ -27,7 +27,14 @@ def provider_id AiApiAuditLog::Provider::OpenAI end - def perform_completion!(dialect, user, model_params = {}, feature_name: nil, &blk) + def perform_completion!( + dialect, + user, + model_params = {}, + feature_name: nil, + feature_context: nil, + &blk + ) if dialect.respond_to?(:is_gpt_o?) && dialect.is_gpt_o? && block_given? # we need to disable streaming and simulate it blk.call "", lambda { |*| } diff --git a/lib/completions/llm.rb b/lib/completions/llm.rb index 445bfc199..0d53b4139 100644 --- a/lib/completions/llm.rb +++ b/lib/completions/llm.rb @@ -191,6 +191,7 @@ def generate( stop_sequences: nil, user:, feature_name: nil, + feature_context: nil, &partial_read_blk ) self.class.record_prompt(prompt) @@ -224,6 +225,7 @@ def generate( user, model_params, feature_name: feature_name, + feature_context: feature_context, &partial_read_blk ) end diff --git a/spec/lib/completions/llm_spec.rb b/spec/lib/completions/llm_spec.rb index e6402b153..2d946e65c 100644 --- a/spec/lib/completions/llm_spec.rb +++ b/spec/lib/completions/llm_spec.rb @@ -55,6 +55,44 @@ expect(log.topic_id).to eq(123) expect(log.post_id).to eq(1) end + + it "can track feature_name and feature_context" do + body = { + model: "gpt-3.5-turbo-0301", + usage: { + prompt_tokens: 337, + completion_tokens: 162, + total_tokens: 499, + }, + choices: [ + { + message: { role: "assistant", content: "test" }, + finish_reason: "stop", + index: 0, + }, + ], + }.to_json + + WebMock.stub_request(:post, "https://api.openai.com/v1/chat/completions").to_return( + status: 200, + body: body, + ) + + result = + described_class.proxy("custom:#{model.id}").generate( + "Hello", + user: user, + feature_name: "llm_triage", + feature_context: { + foo: "bar", + }, + ) + + expect(result).to eq("test") + log = AiApiAuditLog.order("id desc").first + expect(log.feature_name).to eq("llm_triage") + expect(log.feature_context).to eq({ "foo" => "bar" }) + end end describe "#generate with fake model" do diff --git a/spec/lib/discourse_automation/llm_triage_spec.rb b/spec/lib/discourse_automation/llm_triage_spec.rb index 0a8965929..26aea4d44 100644 --- a/spec/lib/discourse_automation/llm_triage_spec.rb +++ b/spec/lib/discourse_automation/llm_triage_spec.rb @@ -32,15 +32,31 @@ def add_automation_field(name, value, type: "text") add_automation_field("flag_post", true, type: "boolean") add_automation_field("canned_reply", "Yo this is a reply") add_automation_field("canned_reply_user", reply_user.username, type: "user") + add_automation_field("max_post_tokens", 100) end it "can trigger via automation" do - post = Fabricate(:post) + post = Fabricate(:post, raw: "hello " * 5000) - DiscourseAi::Completions::Llm.with_prepared_responses(["bad"]) do - automation.running_in_background! - automation.trigger!({ "post" => post }) - end + body = { + model: "gpt-3.5-turbo-0301", + usage: { + prompt_tokens: 337, + completion_tokens: 162, + total_tokens: 499, + }, + choices: [ + { message: { role: "assistant", content: "bad" }, finish_reason: "stop", index: 0 }, + ], + }.to_json + + WebMock.stub_request(:post, "https://api.openai.com/v1/chat/completions").to_return( + status: 200, + body: body, + ) + + automation.running_in_background! + automation.trigger!({ "post" => post }) topic = post.topic.reload expect(topic.category_id).to eq(category.id) @@ -49,6 +65,18 @@ def add_automation_field(name, value, type: "text") reply = topic.posts.order(:post_number).last expect(reply.raw).to eq("Yo this is a reply") expect(reply.user.id).to eq(reply_user.id) + + ai_log = AiApiAuditLog.order("id desc").first + expect(ai_log.feature_name).to eq("llm_triage") + expect(ai_log.feature_context).to eq( + { "automation_id" => automation.id, "automation_name" => automation.name }, + ) + + count = ai_log.raw_request_payload.scan("hello").size + # we could use the exact count here but it can get fragile + # as we change tokenizers, this will give us reasonable confidence + expect(count).to be <= (100) + expect(count).to be > (50) end it "does not reply to the canned_reply_user" do From a33b705d2fc5f5e4348a6002287264fd7c10852f Mon Sep 17 00:00:00 2001 From: Sam Saffron Date: Wed, 23 Oct 2024 15:27:23 +1100 Subject: [PATCH 2/2] lint --- app/models/ai_api_audit_log.rb | 1 - discourse_automation/llm_triage.rb | 4 +--- lib/automation/llm_triage.rb | 7 ++----- lib/automation/report_runner.rb | 2 +- spec/lib/completions/llm_spec.rb | 6 +----- 5 files changed, 5 insertions(+), 15 deletions(-) diff --git a/app/models/ai_api_audit_log.rb b/app/models/ai_api_audit_log.rb index c2d3c5dd8..2fa9f5c37 100644 --- a/app/models/ai_api_audit_log.rb +++ b/app/models/ai_api_audit_log.rb @@ -34,4 +34,3 @@ module Provider # feature_name :string(255) # language_model :string(255) # feature_context :jsonb - diff --git a/discourse_automation/llm_triage.rb b/discourse_automation/llm_triage.rb index 6f03b2290..6ef370793 100644 --- a/discourse_automation/llm_triage.rb +++ b/discourse_automation/llm_triage.rb @@ -52,9 +52,7 @@ flag_type = fields.dig("flag_type", "value") max_post_tokens = fields.dig("max_post_tokens", "value").to_i - if max_post_tokens <= 0 - max_post_tokens = nil - end + max_post_tokens = nil if max_post_tokens <= 0 begin RateLimiter.new( diff --git a/lib/automation/llm_triage.rb b/lib/automation/llm_triage.rb index b11350a9f..253a27243 100644 --- a/lib/automation/llm_triage.rb +++ b/lib/automation/llm_triage.rb @@ -30,15 +30,12 @@ def self.handle( content = "title: #{post.topic.title}\n#{post.raw}" - if max_post_tokens.present? - content = llm.tokenizer.truncate(content, max_post_tokens) - end + content = llm.tokenizer.truncate(content, max_post_tokens) if max_post_tokens.present? prompt.push(type: :user, content: content) result = nil - result = llm.generate( prompt, @@ -49,7 +46,7 @@ def self.handle( feature_context: { automation_id: automation&.id, automation_name: automation&.name, - } + }, )&.strip if result.present? && result.downcase.include?(search_for_text.downcase) diff --git a/lib/automation/report_runner.rb b/lib/automation/report_runner.rb index b961aa120..02363a45b 100644 --- a/lib/automation/report_runner.rb +++ b/lib/automation/report_runner.rb @@ -158,7 +158,7 @@ def run! feature_context: { automation_id: @automation&.id, automation_name: @automation&.name, - } + }, ) do |response| print response if Rails.env.development? && @debug_mode result << response diff --git a/spec/lib/completions/llm_spec.rb b/spec/lib/completions/llm_spec.rb index 2d946e65c..4f22c16fb 100644 --- a/spec/lib/completions/llm_spec.rb +++ b/spec/lib/completions/llm_spec.rb @@ -65,11 +65,7 @@ total_tokens: 499, }, choices: [ - { - message: { role: "assistant", content: "test" }, - finish_reason: "stop", - index: 0, - }, + { message: { role: "assistant", content: "test" }, finish_reason: "stop", index: 0 }, ], }.to_json