FIX: Add a max token limit based on the text to be translated (#1507)

nattsw · web-flow · commit 5d80a34589d6 · 2025-07-17T17:47:15.000+08:00
We're seeing that some LLMs are using 65000+ tokens for raw text that is only 10-1000 characters long.

This PR adds a max_token to be passed to the LLM API for each translation based on the length of the text.
diff --git a/lib/translation/base_translator.rb b/lib/translation/base_translator.rb
@@ -46,12 +46,24 @@ def get_translation(text:, bot:, translation_user:)
             topic: @topic,
             post: @post,
           )
+        max_tokens = get_max_tokens(text)
+        llm_args = { max_tokens: }
 
         result = +""
-        bot.reply(context) { |partial| result << partial }
+        bot.reply(context, llm_args:) { |partial| result << partial }
         result
       end
 
+      def get_max_tokens(text)
+        if text.length < 100
+          500
+        elsif text.length < 500
+          1000
+        else
+          text.length * 2
+        end
+      end
+
       def persona_setting
         raise NotImplementedError
       end
diff --git a/spec/lib/translation/base_translator_spec.rb b/spec/lib/translation/base_translator_spec.rb
@@ -20,6 +20,7 @@
     let(:target_locale) { "de" }
     let(:llm_response) { "hur dur hur dur!" }
     fab!(:post)
+    fab!(:topic) { post.topic }
 
     it "creates the correct prompt" do
       post_translator =
@@ -36,6 +37,62 @@
       end
     end
 
+    it "creates BotContext with the correct parameters and calls bot.reply with correct args" do
+      post_translator =
+        DiscourseAi::Translation::PostRawTranslator.new(text:, target_locale:, post:, topic:)
+
+      expected_content = { content: text, target_locale: target_locale }.to_json
+
+      bot_context = instance_double(DiscourseAi::Personas::BotContext)
+      allow(DiscourseAi::Personas::BotContext).to receive(:new).and_return(bot_context)
+
+      mock_bot = instance_double(DiscourseAi::Personas::Bot)
+      allow(DiscourseAi::Personas::Bot).to receive(:as).and_return(mock_bot)
+      allow(mock_bot).to receive(:reply).and_yield(llm_response)
+
+      post_translator.translate
+
+      expect(DiscourseAi::Personas::BotContext).to have_received(:new).with(
+        user: an_instance_of(User),
+        skip_tool_details: true,
+        feature_name: "translation",
+        messages: [{ type: :user, content: expected_content }],
+        topic: topic,
+        post: post,
+      )
+
+      expect(DiscourseAi::Personas::Bot).to have_received(:as)
+      expect(mock_bot).to have_received(:reply).with(bot_context, llm_args: { max_tokens: 500 })
+    end
+
+    it "sets max_tokens correctly based on text length" do
+      test_cases = [
+        ["Short text", 500], # Short text (< 100 chars)
+        ["a" * 200, 1000], # Medium text (100-500 chars)
+        ["a" * 600, 1200], # Long text (> 500 chars, 600*2=1200)
+      ]
+
+      test_cases.each do |text, expected_max_tokens|
+        translator = DiscourseAi::Translation::PostRawTranslator.new(text: text, target_locale:)
+
+        bot_context = instance_double(DiscourseAi::Personas::BotContext)
+        allow(DiscourseAi::Personas::BotContext).to receive(:new).and_return(bot_context)
+
+        mock_bot = instance_double(DiscourseAi::Personas::Bot)
+        allow(DiscourseAi::Personas::Bot).to receive(:as).and_return(mock_bot)
+        allow(mock_bot).to receive(:reply).and_yield("translated #{text[0..10]}")
+
+        translator.translate
+
+        expect(mock_bot).to have_received(:reply).with(
+          bot_context,
+          llm_args: {
+            max_tokens: expected_max_tokens,
+          },
+        )
+      end
+    end
+
     it "returns the translation from the llm's response" do
       DiscourseAi::Completions::Llm.with_prepared_responses([llm_response]) do
         expect(