discourse · SamSaffron · Mar 24, 2025 · Mar 24, 2025 · Mar 24, 2025 · Mar 24, 2025
diff --git a/lib/ai_bot/bot.rb b/lib/ai_bot/bot.rb
@@ -6,8 +6,10 @@ class Bot
       attr_reader :model
 
       BOT_NOT_FOUND = Class.new(StandardError)
+
       # the future is agentic, allow for more turns
       MAX_COMPLETIONS = 8
+
       # limit is arbitrary, but 5 which was used in the past was too low
       MAX_TOOLS = 20
 
@@ -71,6 +73,8 @@ def get_updated_title(conversation_context, post, user)
       end
 
       def force_tool_if_needed(prompt, context)
+        return if prompt.tool_choice == :none
+
         context[:chosen_tools] ||= []
         forced_tools = persona.force_tool_use.map { |tool| tool.name }
         force_tool = forced_tools.find { |name| !context[:chosen_tools].include?(name) }
@@ -105,7 +109,7 @@ def reply(context, &update_blk)
         needs_newlines = false
         tools_ran = 0
 
-        while total_completions <= MAX_COMPLETIONS && ongoing_chain
+        while total_completions < MAX_COMPLETIONS && ongoing_chain
           tool_found = false
           force_tool_if_needed(prompt, context)
 
@@ -202,8 +206,8 @@ def reply(context, &update_blk)
 
           total_completions += 1
 
-          # do not allow tools when we are at the end of a chain (total_completions == MAX_COMPLETIONS)
-          prompt.tools = [] if total_completions == MAX_COMPLETIONS
+          # do not allow tools when we are at the end of a chain (total_completions == MAX_COMPLETIONS - 1)
+          prompt.tool_choice = :none if total_completions == MAX_COMPLETIONS - 1
         end
 
         embed_thinking(raw_context)

diff --git a/lib/completions/dialects/dialect.rb b/lib/completions/dialects/dialect.rb
@@ -46,10 +46,6 @@ def initialize(generic_prompt, llm_model, opts: {})
 
         VALID_ID_REGEX = /\A[a-zA-Z0-9_]+\z/
 
-        def can_end_with_assistant_msg?
-          false
-        end
-
         def native_tool_support?
           false
         end
@@ -66,16 +62,58 @@ def tool_choice
           prompt.tool_choice
         end
 
+        def self.no_more_tool_calls_text
+          # note, Anthropic must never prefill with an ending whitespace
+          "I WILL NOT USE TOOLS IN THIS REPLY, user expressed they wanted to stop using tool calls.\nHere is the best, complete, answer I can come up with given the information I have."
+        end
+
+        def self.no_more_tool_calls_text_user
+          "DO NOT USE TOOLS IN YOUR REPLY. Return the best answer you can given the information I supplied you."
+        end
+
+        def no_more_tool_calls_text
+          self.class.no_more_tool_calls_text
+        end
+
+        def no_more_tool_calls_text_user
+          self.class.no_more_tool_calls_text_user
+        end
+
         def translate
-          messages = prompt.messages
+          messages = trim_messages(prompt.messages)
+          last_message = messages.last
+          inject_done_on_last_tool_call = false
 
-          # Some models use an assistant msg to improve long-context responses.
-          if messages.last[:type] == :model && can_end_with_assistant_msg?
-            messages = messages.dup
-            messages.pop
+          if !native_tool_support? && last_message && last_message[:type].to_sym == :tool &&
+               prompt.tool_choice == :none
+            inject_done_on_last_tool_call = true
           end
 
-          trim_messages(messages).map { |msg| send("#{msg[:type]}_msg", msg) }.compact
+          translated =
+            messages
+              .map do |msg|
+                case msg[:type].to_sym
+                when :system
+                  system_msg(msg)
+                when :user
+                  user_msg(msg)
+                when :model
+                  model_msg(msg)
+                when :tool
+                  if inject_done_on_last_tool_call && msg == last_message
+                    tools_dialect.inject_done { tool_msg(msg) }
+                  else
+                    tool_msg(msg)
+                  end
+                when :tool_call
+                  tool_call_msg(msg)
+                else
+                  raise ArgumentError, "Unknown message type: #{msg[:type]}"
+                end
+              end
+              .compact
+
+          translated
         end
 
         def conversation_context

diff --git a/lib/completions/dialects/xml_tools.rb b/lib/completions/dialects/xml_tools.rb
@@ -54,8 +54,11 @@ def instructions
             end
         end
 
+        DONE_MESSAGE =
+          "Regardless of what you think, REPLY IMMEDIATELY, WITHOUT MAKING ANY FURTHER TOOL CALLS, YOU ARE OUT OF TOOL CALL QUOTA!"
+
         def from_raw_tool(raw_message)
-          (<<~TEXT).strip
+          result = (<<~TEXT).strip
             <function_results>
             <result>
             <tool_name>#{raw_message[:name] || raw_message[:id]}</tool_name>
@@ -65,6 +68,12 @@ def from_raw_tool(raw_message)
             </result>
             </function_results>
           TEXT
+
+          if @injecting_done
+            "#{result}\n\n#{DONE_MESSAGE}"
+          else
+            result
+          end
         end
 
         def from_raw_tool_call(raw_message)
@@ -86,6 +95,13 @@ def from_raw_tool_call(raw_message)
           TEXT
         end
 
+        def inject_done(&blk)
+          @injecting_done = true
+          blk.call
+        ensure
+          @injecting_done = false
+        end
+
         private
 
         attr_reader :raw_tools

diff --git a/lib/completions/endpoints/anthropic.rb b/lib/completions/endpoints/anthropic.rb
@@ -95,7 +95,18 @@ def prepare_payload(prompt, model_params, dialect)
           if prompt.has_tools?
             payload[:tools] = prompt.tools
             if dialect.tool_choice.present?
-              payload[:tool_choice] = { type: "tool", name: dialect.tool_choice }
+              if dialect.tool_choice == :none
+                payload[:tool_choice] = { type: "none" }
+
+                # prefill prompt to nudge LLM to generate a response that is useful.
+                # without this LLM (even 3.7) can get confused and start text preambles for a tool calls.
+                payload[:messages] << {
+                  role: "assistant",
+                  content: dialect.no_more_tool_calls_text,
+                }
+              else
+                payload[:tool_choice] = { type: "tool", name: prompt.tool_choice }
+              end
             end
           end
 

diff --git a/lib/completions/endpoints/aws_bedrock.rb b/lib/completions/endpoints/aws_bedrock.rb
@@ -122,7 +122,19 @@ def prepare_payload(prompt, model_params, dialect)
             if prompt.has_tools?
               payload[:tools] = prompt.tools
               if dialect.tool_choice.present?
-                payload[:tool_choice] = { type: "tool", name: dialect.tool_choice }
+                if dialect.tool_choice == :none
+                  # not supported on bedrock as of 2025-03-24
+                  # retest in 6 months
+                  # payload[:tool_choice] = { type: "none" }
+
+                  # prefill prompt to nudge LLM to generate a response that is useful, instead of trying to call a tool
+                  payload[:messages] << {
+                    role: "assistant",
+                    content: dialect.no_more_tool_calls_text,
+                  }
+                else
+                  payload[:tool_choice] = { type: "tool", name: prompt.tool_choice }
+                end
               end
             end
           elsif dialect.is_a?(DiscourseAi::Completions::Dialects::Nova)

diff --git a/lib/completions/endpoints/gemini.rb b/lib/completions/endpoints/gemini.rb
@@ -72,10 +72,14 @@ def prepare_payload(prompt, model_params, dialect)
 
             function_calling_config = { mode: "AUTO" }
             if dialect.tool_choice.present?
-              function_calling_config = {
-                mode: "ANY",
-                allowed_function_names: [dialect.tool_choice],
-              }
+              if dialect.tool_choice == :none
+                function_calling_config = { mode: "NONE" }
+              else
+                function_calling_config = {
+                  mode: "ANY",
+                  allowed_function_names: [dialect.tool_choice],
+                }
+              end
             end
 
             payload[:tool_config] = { function_calling_config: function_calling_config }

diff --git a/lib/completions/endpoints/open_ai.rb b/lib/completions/endpoints/open_ai.rb
@@ -92,12 +92,16 @@ def prepare_payload(prompt, model_params, dialect)
             if dialect.tools.present?
               payload[:tools] = dialect.tools
               if dialect.tool_choice.present?
-                payload[:tool_choice] = {
-                  type: "function",
-                  function: {
-                    name: dialect.tool_choice,
-                  },
-                }
+                if dialect.tool_choice == :none
+                  payload[:tool_choice] = "none"
+                else
+                  payload[:tool_choice] = {
+                    type: "function",
+                    function: {
+                      name: dialect.tool_choice,
+                    },
+                  }
+                end
               end
             end
           end

diff --git a/spec/lib/completions/dialects/dialect_spec.rb b/spec/lib/completions/dialects/dialect_spec.rb
@@ -7,6 +7,18 @@ def trim(messages)
     trim_messages(messages)
   end
 
+  def system_msg(msg)
+    msg
+  end
+
+  def user_msg(msg)
+    msg
+  end
+
+  def model_msg(msg)
+    msg
+  end
+
   def tokenizer
     DiscourseAi::Tokenizer::OpenAiTokenizer
   end
@@ -15,6 +27,57 @@ def tokenizer
 RSpec.describe DiscourseAi::Completions::Dialects::Dialect do
   fab!(:llm_model)
 
+  describe "#translate" do
+    let(:five_token_msg) { "This represents five tokens." }
+    let(:tools) do
+      [
+        {
+          name: "echo",
+          description: "echo a string",
+          parameters: [
+            { name: "text", type: "string", description: "string to echo", required: true },
+          ],
+        },
+      ]
+    end
+
+    it "injects done message when tool_choice is :none and last message follows tool pattern" do
+      tool_call_prompt = { name: "echo", arguments: { text: "test message" } }
+
+      prompt = DiscourseAi::Completions::Prompt.new("System instructions", tools: tools)
+      prompt.push(type: :user, content: "echo test message")
+      prompt.push(type: :tool_call, content: tool_call_prompt.to_json, id: "123", name: "echo")
+      prompt.push(type: :tool, content: "test message".to_json, name: "echo", id: "123")
+      prompt.tool_choice = :none
+
+      dialect = TestDialect.new(prompt, llm_model)
+      dialect.max_prompt_tokens = 100 # Set high enough to avoid trimming
+
+      translated = dialect.translate
+
+      expect(translated).to eq(
+        [
+          { type: :system, content: "System instructions" },
+          { type: :user, content: "echo test message" },
+          {
+            type: :tool_call,
+            content:
+              "<function_calls>\n<invoke>\n<tool_name>echo</tool_name>\n<parameters>\n<text>test message</text>\n</parameters>\n</invoke>\n</function_calls>",
+            id: "123",
+            name: "echo",
+          },
+          {
+            type: :tool,
+            id: "123",
+            name: "echo",
+            content:
+              "<function_results>\n<result>\n<tool_name>echo</tool_name>\n<json>\n\"test message\"\n</json>\n</result>\n</function_results>\n\n#{::DiscourseAi::Completions::Dialects::XmlTools::DONE_MESSAGE}",
+          },
+        ],
+      )
+    end
+  end
+
   describe "#trim_messages" do
     let(:five_token_msg) { "This represents five tokens." }
 

diff --git a/spec/lib/completions/endpoints/anthropic_spec.rb b/spec/lib/completions/endpoints/anthropic_spec.rb
@@ -714,4 +714,59 @@
       expect(parsed_body[:max_tokens]).to eq(500)
     end
   end
+
+  describe "disabled tool use" do
+    it "can properly disable tool use with :none" do
+      prompt =
+        DiscourseAi::Completions::Prompt.new(
+          "You are a bot",
+          messages: [type: :user, id: "user1", content: "don't use any tools please"],
+          tools: [echo_tool],
+          tool_choice: :none,
+        )
+
+      response_body = {
+        id: "msg_01RdJkxCbsEj9VFyFYAkfy2S",
+        type: "message",
+        role: "assistant",
+        model: "claude-3-haiku-20240307",
+        content: [
+          { type: "text", text: "I won't use any tools. Here's a direct response instead." },
+        ],
+        stop_reason: "end_turn",
+        stop_sequence: nil,
+        usage: {
+          input_tokens: 345,
+          output_tokens: 65,
+        },
+      }.to_json
+
+      parsed_body = nil
+      stub_request(:post, url).with(
+        body:
+          proc do |req_body|
+            parsed_body = JSON.parse(req_body, symbolize_names: true)
+            true
+          end,
+      ).to_return(status: 200, body: response_body)
+
+      result = llm.generate(prompt, user: Discourse.system_user)
+
+      # Verify that tool_choice is set to { type: "none" }
+      expect(parsed_body[:tool_choice]).to eq({ type: "none" })
+
+      # Verify that an assistant message with no_more_tool_calls_text was added
+      messages = parsed_body[:messages]
+      expect(messages.length).to eq(2) # user message + added assistant message
+
+      last_message = messages.last
+      expect(last_message[:role]).to eq("assistant")
+
+      expect(last_message[:content]).to eq(
+        DiscourseAi::Completions::Dialects::Dialect.no_more_tool_calls_text,
+      )
+
+      expect(result).to eq("I won't use any tools. Here's a direct response instead.")
+    end
+  end
 end