Merge branch 'main' into costs-metric

keegangeorge · keegangeorge · commit f725ad6e0e02 · 2025-04-17T14:48:05.000-07:00
diff --git a/app/controllers/discourse_ai/admin/ai_llms_controller.rb b/app/controllers/discourse_ai/admin/ai_llms_controller.rb
@@ -157,6 +157,7 @@ def ai_llm_params(updating: nil)
             :provider,
             :tokenizer,
             :max_prompt_tokens,
+            :max_output_tokens,
             :api_key,
             :enabled_chat_bot,
             :vision_enabled,
diff --git a/app/models/llm_model.rb b/app/models/llm_model.rb
@@ -16,6 +16,7 @@ class LlmModel < ActiveRecord::Base
   validates :input_cost,
             :cached_input_cost,
             :output_cost,
+            :max_output_tokens,
             numericality: {
               greater_than_or_equal_to: 0,
             },
@@ -193,4 +194,5 @@ def required_provider_params
 #  input_cost        :float
 #  cached_input_cost :float
 #  output_cost       :float
+#  max_output_tokens :integer
 #
diff --git a/app/serializers/llm_model_serializer.rb b/app/serializers/llm_model_serializer.rb
@@ -11,6 +11,7 @@ class LlmModelSerializer < ApplicationSerializer
              :name,
              :provider,
              :max_prompt_tokens,
+             :max_output_tokens,
              :tokenizer,
              :api_key,
              :url,
diff --git a/assets/javascripts/discourse/admin/models/ai-llm.js b/assets/javascripts/discourse/admin/models/ai-llm.js
@@ -10,6 +10,7 @@ export default class AiLlm extends RestModel {
       "provider",
       "tokenizer",
       "max_prompt_tokens",
+      "max_output_tokens",
       "url",
       "api_key",
       "enabled_chat_bot",
diff --git a/assets/javascripts/discourse/components/ai-llm-editor-form.gjs b/assets/javascripts/discourse/components/ai-llm-editor-form.gjs
@@ -40,6 +40,7 @@ export default class AiLlmEditorForm extends Component {
 
       return {
         max_prompt_tokens: modelInfo.tokens,
+        max_output_tokens: modelInfo.max_output_tokens,
         tokenizer: info.tokenizer,
         url: modelInfo.endpoint || info.endpoint,
         display_name: modelInfo.display_name,
@@ -56,6 +57,7 @@ export default class AiLlmEditorForm extends Component {
 
     return {
       max_prompt_tokens: model.max_prompt_tokens,
+      max_output_tokens: model.max_output_tokens,
       api_key: model.api_key,
       tokenizer: model.tokenizer,
       url: model.url,
@@ -185,8 +187,18 @@ export default class AiLlmEditorForm extends Component {
     this.isSaving = true;
     const isNew = this.args.model.isNew;
 
+    const updatedData = {
+      ...data,
+    };
+
+    // If max_prompt_tokens input is cleared,
+    // we want the db to store null
+    if (!data.max_output_tokens) {
+      updatedData.max_output_tokens = null;
+    }
+
     try {
-      await this.args.model.save(data);
+      await this.args.model.save(updatedData);
 
       if (isNew) {
         this.args.llms.addObject(this.args.model);
@@ -397,7 +409,6 @@ export default class AiLlmEditorForm extends Component {
           as |field|
         >
           <field.Input @type="number" step="any" min="0" lang="en" />
-
         </inputGroup.Field>
 
         <inputGroup.Field
@@ -408,7 +419,6 @@ export default class AiLlmEditorForm extends Component {
           as |field|
         >
           <field.Input @type="number" step="any" min="0" lang="en" />
-
         </inputGroup.Field>
 
         <inputGroup.Field
@@ -422,6 +432,16 @@ export default class AiLlmEditorForm extends Component {
         </inputGroup.Field>
       </form.InputGroup>
 
+      <form.Field
+        @name="max_output_tokens"
+        @title={{i18n "discourse_ai.llms.max_output_tokens"}}
+        @tooltip={{i18n "discourse_ai.llms.hints.max_output_tokens"}}
+        @format="large"
+        as |field|
+      >
+        <field.Input @type="number" step="any" min="0" lang="en" />
+      </form.Field>
+
       <form.Field
         @name="vision_enabled"
         @title={{i18n "discourse_ai.llms.vision_enabled"}}
diff --git a/config/locales/client.en.yml b/config/locales/client.en.yml
@@ -399,7 +399,8 @@ en:
         name: "Model id"
         provider: "Provider"
         tokenizer: "Tokenizer"
-        max_prompt_tokens: "Number of tokens for the prompt"
+        max_prompt_tokens: "Context window"
+        max_output_tokens: "Max output tokens"
         url: "URL of the service hosting the model"
         api_key: "API Key of the service hosting the model"
         enabled_chat_bot: "Allow AI bot selector"
@@ -486,7 +487,8 @@ en:
           failure: "Trying to contact the model returned this error: %{error}"
 
         hints:
-          max_prompt_tokens: "Max numbers of tokens for the prompt. As a rule of thumb, this should be 50% of the model's context window."
+          max_prompt_tokens: "The maximum number of tokens the model can process in a single request"
+          max_output_tokens: "The maximum number of tokens the model can generate in a single request"
           display_name: "The name used to reference this model across your site's interface."
           name: "We include this in the API call to specify which model we'll use"
           vision_enabled: "If enabled, the AI will attempt to understand images. It depends on the model being used supporting vision. Supported by latest models from Anthropic, Google, and OpenAI."
diff --git a/db/migrate/20250417194503_add_max_output_tokens_to_llm_model.rb b/db/migrate/20250417194503_add_max_output_tokens_to_llm_model.rb
@@ -0,0 +1,7 @@
+# frozen_string_literal: true
+
+class AddMaxOutputTokensToLlmModel < ActiveRecord::Migration[7.2]
+  def change
+    add_column :llm_models, :max_output_tokens, :integer
+  end
+end
diff --git a/spec/system/ai_helper/ai_composer_helper_spec.rb b/spec/system/ai_helper/ai_composer_helper_spec.rb
@@ -36,9 +36,6 @@
         "I like to eat pie. It is a very good dessert. Some people are wasteful by throwing pie at others but I do not do that. I always eat the pie.",
     )
   end
-  fab!(:post_2) do
-    Fabricate(:post, topic: topic, raw: "La lluvia en España se queda principalmente en el avión.")
-  end
 
   def trigger_composer_helper(content)
     visit("/latest")
diff --git a/spec/system/llms/ai_llm_spec.rb b/spec/system/llms/ai_llm_spec.rb
@@ -53,6 +53,7 @@
     form.field("max_prompt_tokens").fill_in(8000)
     form.field("provider").select("vllm")
     form.field("tokenizer").select("DiscourseAi::Tokenizer::Llama3Tokenizer")
+    form.field("max_output_tokens").fill_in(2000)
     form.field("vision_enabled").toggle
     form.field("enabled_chat_bot").toggle
     form.submit
@@ -67,6 +68,7 @@
     expect(llm.tokenizer).to eq("DiscourseAi::Tokenizer::Llama3Tokenizer")
     expect(llm.max_prompt_tokens.to_i).to eq(8000)
     expect(llm.provider).to eq("vllm")
+    expect(llm.max_output_tokens.to_i).to eq(2000)
     expect(llm.vision_enabled).to eq(true)
     expect(llm.user_id).not_to be_nil
   end

Original file line number	Diff line number	Diff line change
`@@ -36,9 +36,6 @@`
`36`	`36`	`"I like to eat pie. It is a very good dessert. Some people are wasteful by throwing pie at others but I do not do that. I always eat the pie.",`
`37`	`37`	`)`
`38`	`38`	`end`
`39`		`- fab!(:post_2) do`
`40`		`- Fabricate(:post, topic: topic, raw: "La lluvia en España se queda principalmente en el avión.")`
`41`		`- end`
`42`	`39`
`43`	`40`	`def trigger_composer_helper(content)`
`44`	`41`	`visit("/latest")`