FEATURE: allow specifying tool use none in completion prompt

SamSaffron · nattsw · web-flow · commit 1dde82eb583c · 2025-03-25T08:06:43.000+11:00
This PR adds support for disabling further tool calls by setting tool_choice to :none across all supported LLM providers:

- OpenAI: Uses "none" tool_choice parameter
- Anthropic: Uses {type: "none"} and adds a prefill message to prevent confusion
- Gemini: Sets function_calling_config mode to "NONE"
- AWS Bedrock: Doesn't natively support tool disabling, so adds a prefill message

We previously used to disable tool calls by simply removing tool definitions, but this would cause errors with some providers. This implementation uses the supported method appropriate for each provider while providing a fallback for Bedrock.

Co-authored-by: Natalie Tay &lt;natalie.tay@gmail.com&gt;

* remove stray puts

* cleaner chain breaker for last tool call (works in thinking)

remove unused code

* improve test

---------

Co-authored-by: Natalie Tay &lt;natalie.tay@gmail.com&gt;
diff --git a/lib/ai_bot/bot.rb b/lib/ai_bot/bot.rb
@@ -6,8 +6,10 @@ class Bot
       attr_reader :model
 
       BOT_NOT_FOUND = Class.new(StandardError)
+
       # the future is agentic, allow for more turns
       MAX_COMPLETIONS = 8
+
       # limit is arbitrary, but 5 which was used in the past was too low
       MAX_TOOLS = 20
 
@@ -71,6 +73,8 @@ def get_updated_title(conversation_context, post, user)
       end
 
       def force_tool_if_needed(prompt, context)
+        return if prompt.tool_choice == :none
+
         context[:chosen_tools] ||= []
         forced_tools = persona.force_tool_use.map { |tool| tool.name }
         force_tool = forced_tools.find { |name| !context[:chosen_tools].include?(name) }
@@ -105,7 +109,7 @@ def reply(context, &update_blk)
         needs_newlines = false
         tools_ran = 0
 
-        while total_completions <= MAX_COMPLETIONS && ongoing_chain
+        while total_completions < MAX_COMPLETIONS && ongoing_chain
           tool_found = false
           force_tool_if_needed(prompt, context)
 
@@ -202,8 +206,8 @@ def reply(context, &update_blk)
 
           total_completions += 1
 
-          # do not allow tools when we are at the end of a chain (total_completions == MAX_COMPLETIONS)
-          prompt.tools = [] if total_completions == MAX_COMPLETIONS
+          # do not allow tools when we are at the end of a chain (total_completions == MAX_COMPLETIONS - 1)
+          prompt.tool_choice = :none if total_completions == MAX_COMPLETIONS - 1
         end
 
         embed_thinking(raw_context)
diff --git a/lib/completions/dialects/dialect.rb b/lib/completions/dialects/dialect.rb
@@ -46,10 +46,6 @@ def initialize(generic_prompt, llm_model, opts: {})
 
         VALID_ID_REGEX = /\A[a-zA-Z0-9_]+\z/
 
-        def can_end_with_assistant_msg?
-          false
-        end
-
         def native_tool_support?
           false
         end
@@ -66,16 +62,58 @@ def tool_choice
           prompt.tool_choice
         end
 
+        def self.no_more_tool_calls_text
+          # note, Anthropic must never prefill with an ending whitespace
+          "I WILL NOT USE TOOLS IN THIS REPLY, user expressed they wanted to stop using tool calls.\nHere is the best, complete, answer I can come up with given the information I have."
+        end
+
+        def self.no_more_tool_calls_text_user
+          "DO NOT USE TOOLS IN YOUR REPLY. Return the best answer you can given the information I supplied you."
+        end
+
+        def no_more_tool_calls_text
+          self.class.no_more_tool_calls_text
+        end
+
+        def no_more_tool_calls_text_user
+          self.class.no_more_tool_calls_text_user
+        end
+
         def translate
-          messages = prompt.messages
+          messages = trim_messages(prompt.messages)
+          last_message = messages.last
+          inject_done_on_last_tool_call = false
 
-          # Some models use an assistant msg to improve long-context responses.
-          if messages.last[:type] == :model && can_end_with_assistant_msg?
-            messages = messages.dup
-            messages.pop
+          if !native_tool_support? && last_message && last_message[:type].to_sym == :tool &&
+               prompt.tool_choice == :none
+            inject_done_on_last_tool_call = true
           end
 
-          trim_messages(messages).map { |msg| send("#{msg[:type]}_msg", msg) }.compact
+          translated =
+            messages
+              .map do |msg|
+                case msg[:type].to_sym
+                when :system
+                  system_msg(msg)
+                when :user
+                  user_msg(msg)
+                when :model
+                  model_msg(msg)
+                when :tool
+                  if inject_done_on_last_tool_call && msg == last_message
+                    tools_dialect.inject_done { tool_msg(msg) }
+                  else
+                    tool_msg(msg)
+                  end
+                when :tool_call
+                  tool_call_msg(msg)
+                else
+                  raise ArgumentError, "Unknown message type: #{msg[:type]}"
+                end
+              end
+              .compact
+
+          translated
         end
 
         def conversation_context
diff --git a/lib/completions/dialects/xml_tools.rb b/lib/completions/dialects/xml_tools.rb
@@ -54,8 +54,11 @@ def instructions
             end
         end
 
+        DONE_MESSAGE =
+          "Regardless of what you think, REPLY IMMEDIATELY, WITHOUT MAKING ANY FURTHER TOOL CALLS, YOU ARE OUT OF TOOL CALL QUOTA!"
+
         def from_raw_tool(raw_message)
-          (<<~TEXT).strip
+          result = (<<~TEXT).strip
             <function_results>
             <result>
             <tool_name>#{raw_message[:name] || raw_message[:id]}</tool_name>
@@ -65,6 +68,12 @@ def from_raw_tool(raw_message)
             </result>
             </function_results>
           TEXT
+
+          if @injecting_done
+            "#{result}\n\n#{DONE_MESSAGE}"
+          else
+            result
+          end
         end
 
         def from_raw_tool_call(raw_message)
@@ -86,6 +95,13 @@ def from_raw_tool_call(raw_message)
           TEXT
         end
 
+        def inject_done(&blk)
+          @injecting_done = true
+          blk.call
+        ensure
+          @injecting_done = false
+        end
+
         private
 
         attr_reader :raw_tools
diff --git a/lib/completions/endpoints/anthropic.rb b/lib/completions/endpoints/anthropic.rb
@@ -95,7 +95,18 @@ def prepare_payload(prompt, model_params, dialect)
           if prompt.has_tools?
             payload[:tools] = prompt.tools
             if dialect.tool_choice.present?
-              payload[:tool_choice] = { type: "tool", name: dialect.tool_choice }
+              if dialect.tool_choice == :none
+                payload[:tool_choice] = { type: "none" }
+
+                # prefill prompt to nudge LLM to generate a response that is useful.
+                # without this LLM (even 3.7) can get confused and start text preambles for a tool calls.
+                payload[:messages] << {
+                  role: "assistant",
+                  content: dialect.no_more_tool_calls_text,
+                }
+              else
+                payload[:tool_choice] = { type: "tool", name: prompt.tool_choice }
+              end
             end
           end
 
diff --git a/lib/completions/endpoints/aws_bedrock.rb b/lib/completions/endpoints/aws_bedrock.rb
@@ -122,7 +122,19 @@ def prepare_payload(prompt, model_params, dialect)
             if prompt.has_tools?
               payload[:tools] = prompt.tools
               if dialect.tool_choice.present?
-                payload[:tool_choice] = { type: "tool", name: dialect.tool_choice }
+                if dialect.tool_choice == :none
+                  # not supported on bedrock as of 2025-03-24
+                  # retest in 6 months
+                  # payload[:tool_choice] = { type: "none" }
+
+                  # prefill prompt to nudge LLM to generate a response that is useful, instead of trying to call a tool
+                  payload[:messages] << {
+                    role: "assistant",
+                    content: dialect.no_more_tool_calls_text,
+                  }
+                else
+                  payload[:tool_choice] = { type: "tool", name: prompt.tool_choice }
+                end
               end
             end
           elsif dialect.is_a?(DiscourseAi::Completions::Dialects::Nova)
diff --git a/lib/completions/endpoints/gemini.rb b/lib/completions/endpoints/gemini.rb
@@ -72,10 +72,14 @@ def prepare_payload(prompt, model_params, dialect)
 
             function_calling_config = { mode: "AUTO" }
             if dialect.tool_choice.present?
-              function_calling_config = {
-                mode: "ANY",
-                allowed_function_names: [dialect.tool_choice],
-              }
+              if dialect.tool_choice == :none
+                function_calling_config = { mode: "NONE" }
+              else
+                function_calling_config = {
+                  mode: "ANY",
+                  allowed_function_names: [dialect.tool_choice],
+                }
+              end
             end
 
             payload[:tool_config] = { function_calling_config: function_calling_config }
diff --git a/lib/completions/endpoints/open_ai.rb b/lib/completions/endpoints/open_ai.rb
@@ -92,12 +92,16 @@ def prepare_payload(prompt, model_params, dialect)
             if dialect.tools.present?
               payload[:tools] = dialect.tools
               if dialect.tool_choice.present?
-                payload[:tool_choice] = {
-                  type: "function",
-                  function: {
-                    name: dialect.tool_choice,
-                  },
-                }
+                if dialect.tool_choice == :none
+                  payload[:tool_choice] = "none"
+                else
+                  payload[:tool_choice] = {
+                    type: "function",
+                    function: {
+                      name: dialect.tool_choice,
+                    },
+                  }
+                end
               end
             end
           end
diff --git a/spec/lib/completions/dialects/dialect_spec.rb b/spec/lib/completions/dialects/dialect_spec.rb
@@ -7,6 +7,18 @@ def trim(messages)
     trim_messages(messages)
   end
 
+  def system_msg(msg)
+    msg
+  end
+
+  def user_msg(msg)
+    msg
+  end
+
+  def model_msg(msg)
+    msg
+  end
+
   def tokenizer
     DiscourseAi::Tokenizer::OpenAiTokenizer
   end
@@ -15,6 +27,57 @@ def tokenizer
 RSpec.describe DiscourseAi::Completions::Dialects::Dialect do
   fab!(:llm_model)
 
+  describe "#translate" do
+    let(:five_token_msg) { "This represents five tokens." }
+    let(:tools) do
+      [
+        {
+          name: "echo",
+          description: "echo a string",
+          parameters: [
+            { name: "text", type: "string", description: "string to echo", required: true },
+          ],
+        },
+      ]
+    end
+
+    it "injects done message when tool_choice is :none and last message follows tool pattern" do
+      tool_call_prompt = { name: "echo", arguments: { text: "test message" } }
+
+      prompt = DiscourseAi::Completions::Prompt.new("System instructions", tools: tools)
+      prompt.push(type: :user, content: "echo test message")
+      prompt.push(type: :tool_call, content: tool_call_prompt.to_json, id: "123", name: "echo")
+      prompt.push(type: :tool, content: "test message".to_json, name: "echo", id: "123")
+      prompt.tool_choice = :none
+
+      dialect = TestDialect.new(prompt, llm_model)
+      dialect.max_prompt_tokens = 100 # Set high enough to avoid trimming
+
+      translated = dialect.translate
+
+      expect(translated).to eq(
+        [
+          { type: :system, content: "System instructions" },
+          { type: :user, content: "echo test message" },
+          {
+            type: :tool_call,
+            content:
+              "<function_calls>\n<invoke>\n<tool_name>echo</tool_name>\n<parameters>\n<text>test message</text>\n</parameters>\n</invoke>\n</function_calls>",
+            id: "123",
+            name: "echo",
+          },
+          {
+            type: :tool,
+            id: "123",
+            name: "echo",
+            content:
+              "<function_results>\n<result>\n<tool_name>echo</tool_name>\n<json>\n\"test message\"\n</json>\n</result>\n</function_results>\n\n#{::DiscourseAi::Completions::Dialects::XmlTools::DONE_MESSAGE}",
+          },
+        ],
+      )
+    end
+  end
+
   describe "#trim_messages" do
     let(:five_token_msg) { "This represents five tokens." }
 
diff --git a/spec/lib/completions/endpoints/anthropic_spec.rb b/spec/lib/completions/endpoints/anthropic_spec.rb
@@ -714,4 +714,59 @@
       expect(parsed_body[:max_tokens]).to eq(500)
     end
   end
+
+  describe "disabled tool use" do
+    it "can properly disable tool use with :none" do
+      prompt =
+        DiscourseAi::Completions::Prompt.new(
+          "You are a bot",
+          messages: [type: :user, id: "user1", content: "don't use any tools please"],
+          tools: [echo_tool],
+          tool_choice: :none,
+        )
+
+      response_body = {
+        id: "msg_01RdJkxCbsEj9VFyFYAkfy2S",
+        type: "message",
+        role: "assistant",
+        model: "claude-3-haiku-20240307",
+        content: [
+          { type: "text", text: "I won't use any tools. Here's a direct response instead." },
+        ],
+        stop_reason: "end_turn",
+        stop_sequence: nil,
+        usage: {
+          input_tokens: 345,
+          output_tokens: 65,
+        },
+      }.to_json
+
+      parsed_body = nil
+      stub_request(:post, url).with(
+        body:
+          proc do |req_body|
+            parsed_body = JSON.parse(req_body, symbolize_names: true)
+            true
+          end,
+      ).to_return(status: 200, body: response_body)
+
+      result = llm.generate(prompt, user: Discourse.system_user)
+
+      # Verify that tool_choice is set to { type: "none" }
+      expect(parsed_body[:tool_choice]).to eq({ type: "none" })
+
+      # Verify that an assistant message with no_more_tool_calls_text was added
+      messages = parsed_body[:messages]
+      expect(messages.length).to eq(2) # user message + added assistant message
+
+      last_message = messages.last
+      expect(last_message[:role]).to eq("assistant")
+
+      expect(last_message[:content]).to eq(
+        DiscourseAi::Completions::Dialects::Dialect.no_more_tool_calls_text,
+      )
+
+      expect(result).to eq("I won't use any tools. Here's a direct response instead.")
+    end
+  end
 end
diff --git a/spec/lib/completions/endpoints/aws_bedrock_spec.rb b/spec/lib/completions/endpoints/aws_bedrock_spec.rb
diff --git a/spec/lib/completions/endpoints/gemini_spec.rb b/spec/lib/completions/endpoints/gemini_spec.rb
diff --git a/spec/lib/completions/endpoints/open_ai_spec.rb b/spec/lib/completions/endpoints/open_ai_spec.rb