discourse · SamSaffron · Jan 13, 2025 · Jan 11, 2025 · Jan 13, 2025 · Jan 13, 2025
diff --git a/app/controllers/discourse_ai/ai_bot/artifacts_controller.rb b/app/controllers/discourse_ai/ai_bot/artifacts_controller.rb
@@ -57,6 +57,7 @@ def show
             <head>
               <meta charset="UTF-8">
               <title>#{ERB::Util.html_escape(name)}</title>
+              <meta name="viewport" content="width=device-width, initial-scale=1.0, minimum-scale=1.0, user-scalable=yes, viewport-fit=cover, interactive-widget=resizes-content">
               <style>
                 html, body, iframe {
                   margin: 0;

diff --git a/app/models/llm_model.rb b/app/models/llm_model.rb
@@ -32,6 +32,7 @@ def self.provider_params
       open_ai: {
         organization: :text,
         disable_native_tools: :checkbox,
+        disable_streaming: :checkbox,
       },
       mistral: {
         disable_native_tools: :checkbox,
@@ -51,11 +52,13 @@ def self.provider_params
       ollama: {
         disable_system_prompt: :checkbox,
         enable_native_tool: :checkbox,
+        disable_streaming: :checkbox,
       },
       open_router: {
         disable_native_tools: :checkbox,
         provider_order: :text,
         provider_quantizations: :text,
+        disable_streaming: :checkbox,
       },
     }
   end

diff --git a/config/locales/client.en.yml b/config/locales/client.en.yml
@@ -420,6 +420,7 @@ en:
           disable_native_tools: "Disable native tool support (use XML based tools)"
           provider_order: "Provider order (comma delimited list)"
           provider_quantizations: "Order of provider quantizations (comma delimited list eg: fp16,fp8)"
+          disable_streaming: "Disable streaming completions (convert streaming to non streaming requests)"
 
       related_topics:
         title: "Related topics"

diff --git a/lib/completions/endpoints/base.rb b/lib/completions/endpoints/base.rb
@@ -69,6 +69,27 @@ def perform_completion!(
           model_params = normalize_model_params(model_params)
           orig_blk = blk
 
+          if block_given? && disable_streaming?
+            result =
+              perform_completion!(
+                dialect,
+                user,
+                model_params,
+                feature_name: feature_name,
+                feature_context: feature_context,
+                partial_tool_calls: partial_tool_calls,
+              )
+
+            result = [result] if !result.is_a?(Array)
+            cancelled_by_caller = false
+            cancel_proc = -> { cancelled_by_caller = true }
+            result.each do |partial|
+              blk.call(partial, cancel_proc)
+              break if cancelled_by_caller
+            end
+            return result
+          end
+
           @streaming_mode = block_given?
 
           prompt = dialect.translate
@@ -261,6 +282,10 @@ def xml_tools_enabled?
           raise NotImplementedError
         end
 
+        def disable_streaming?
+          @disable_streaming = llm_model.lookup_custom_param("disable_streaming")
+        end
+
         private
 
         def start_log(

diff --git a/lib/completions/endpoints/open_ai.rb b/lib/completions/endpoints/open_ai.rb
@@ -42,6 +42,10 @@ def perform_completion!(
 
         private
 
+        def disable_streaming?
+          @disable_streaming = llm_model.lookup_custom_param("disable_streaming")
+        end
+
         def model_uri
           if llm_model.url.to_s.starts_with?("srv://")
             service = DiscourseAi::Utils::DnsSrv.lookup(llm_model.url.sub("srv://", ""))

diff --git a/spec/lib/completions/endpoints/open_ai_spec.rb b/spec/lib/completions/endpoints/open_ai_spec.rb
@@ -457,6 +457,43 @@ def request_body(prompt, stream: false, tool_call: false)
       end
     end
 
+    it "falls back to non-streaming mode when streaming is disabled" do
+      model.update!(provider_params: { disable_streaming: true })
+
+      response = {
+        id: "chatcmpl-123",
+        object: "chat.completion",
+        created: 1_677_652_288,
+        choices: [
+          {
+            message: {
+              role: "assistant",
+              content: "Hello there",
+            },
+            index: 0,
+            finish_reason: "stop",
+          },
+        ],
+      }
+
+      parsed_body = nil
+      stub_request(:post, "https://api.openai.com/v1/chat/completions").with(
+        body:
+          proc do |req_body|
+            parsed_body = JSON.parse(req_body, symbolize_names: true)
+            true
+          end,
+      ).to_return(status: 200, body: response.to_json)
+
+      chunks = []
+      dialect = compliance.dialect(prompt: compliance.generic_prompt)
+      endpoint.perform_completion!(dialect, user) { |chunk| chunks << chunk }
+
+      expect(parsed_body).not_to have_key(:stream)
+
+      expect(chunks).to eq(["Hello there"])
+    end
+
     describe "when using streaming mode" do
       context "with simple prompts" do
         it "completes a trivial prompt and logs the response" do