FEATURE: add the ability to disable streaming on an Open AI LLM

SamSaffron · web-flow · commit 20612fde52d3 · 2025-01-13T17:01:01.000+11:00
Disabling streaming is required for models such o1 that do not have streaming
enabled yet

It is good to carry this feature around in case various apis decide not to support streaming endpoints and Discourse AI can continue to work just as it did before. 

Also: fixes issue where sharing artifacts would miss viewport leading to tiny artifacts on mobile
diff --git a/app/controllers/discourse_ai/ai_bot/artifacts_controller.rb b/app/controllers/discourse_ai/ai_bot/artifacts_controller.rb
@@ -57,6 +57,7 @@ def show
             <head>
               <meta charset="UTF-8">
               <title>#{ERB::Util.html_escape(name)}</title>
+              <meta name="viewport" content="width=device-width, initial-scale=1.0, minimum-scale=1.0, user-scalable=yes, viewport-fit=cover, interactive-widget=resizes-content">
               <style>
                 html, body, iframe {
                   margin: 0;
diff --git a/app/models/llm_model.rb b/app/models/llm_model.rb
@@ -32,6 +32,7 @@ def self.provider_params
       open_ai: {
         organization: :text,
         disable_native_tools: :checkbox,
+        disable_streaming: :checkbox,
       },
       mistral: {
         disable_native_tools: :checkbox,
@@ -51,11 +52,13 @@ def self.provider_params
       ollama: {
         disable_system_prompt: :checkbox,
         enable_native_tool: :checkbox,
+        disable_streaming: :checkbox,
       },
       open_router: {
         disable_native_tools: :checkbox,
         provider_order: :text,
         provider_quantizations: :text,
+        disable_streaming: :checkbox,
       },
     }
   end
diff --git a/config/locales/client.en.yml b/config/locales/client.en.yml
@@ -420,6 +420,7 @@ en:
           disable_native_tools: "Disable native tool support (use XML based tools)"
           provider_order: "Provider order (comma delimited list)"
           provider_quantizations: "Order of provider quantizations (comma delimited list eg: fp16,fp8)"
+          disable_streaming: "Disable streaming completions (convert streaming to non streaming requests)"
 
       related_topics:
         title: "Related topics"
diff --git a/lib/completions/endpoints/base.rb b/lib/completions/endpoints/base.rb
@@ -69,6 +69,27 @@ def perform_completion!(
           model_params = normalize_model_params(model_params)
           orig_blk = blk
 
+          if block_given? && disable_streaming?
+            result =
+              perform_completion!(
+                dialect,
+                user,
+                model_params,
+                feature_name: feature_name,
+                feature_context: feature_context,
+                partial_tool_calls: partial_tool_calls,
+              )
+
+            result = [result] if !result.is_a?(Array)
+            cancelled_by_caller = false
+            cancel_proc = -> { cancelled_by_caller = true }
+            result.each do |partial|
+              blk.call(partial, cancel_proc)
+              break if cancelled_by_caller
+            end
+            return result
+          end
+
           @streaming_mode = block_given?
 
           prompt = dialect.translate
@@ -261,6 +282,10 @@ def xml_tools_enabled?
           raise NotImplementedError
         end
 
+        def disable_streaming?
+          @disable_streaming = !!llm_model.lookup_custom_param("disable_streaming")
+        end
+
         private
 
         def start_log(
diff --git a/lib/completions/endpoints/open_ai.rb b/lib/completions/endpoints/open_ai.rb
@@ -42,6 +42,10 @@ def perform_completion!(
 
         private
 
+        def disable_streaming?
+          @disable_streaming = llm_model.lookup_custom_param("disable_streaming")
+        end
+
         def model_uri
           if llm_model.url.to_s.starts_with?("srv://")
             service = DiscourseAi::Utils::DnsSrv.lookup(llm_model.url.sub("srv://", ""))
diff --git a/spec/lib/completions/endpoints/open_ai_spec.rb b/spec/lib/completions/endpoints/open_ai_spec.rb
@@ -457,6 +457,43 @@ def request_body(prompt, stream: false, tool_call: false)
       end
     end
 
+    it "falls back to non-streaming mode when streaming is disabled" do
+      model.update!(provider_params: { disable_streaming: true })
+
+      response = {
+        id: "chatcmpl-123",
+        object: "chat.completion",
+        created: 1_677_652_288,
+        choices: [
+          {
+            message: {
+              role: "assistant",
+              content: "Hello there",
+            },
+            index: 0,
+            finish_reason: "stop",
+          },
+        ],
+      }
+
+      parsed_body = nil
+      stub_request(:post, "https://api.openai.com/v1/chat/completions").with(
+        body:
+          proc do |req_body|
+            parsed_body = JSON.parse(req_body, symbolize_names: true)
+            true
+          end,
+      ).to_return(status: 200, body: response.to_json)
+
+      chunks = []
+      dialect = compliance.dialect(prompt: compliance.generic_prompt)
+      endpoint.perform_completion!(dialect, user) { |chunk| chunks << chunk }
+
+      expect(parsed_body).not_to have_key(:stream)
+
+      expect(chunks).to eq(["Hello there"])
+    end
+
     describe "when using streaming mode" do
       context "with simple prompts" do
         it "completes a trivial prompt and logs the response" do