Gemini support for new interface

SamSaffron · SamSaffron · commit c5d1b7b1f715 · 2024-11-10T17:27:26.000+11:00
diff --git a/lib/completions/endpoints/cohere.rb b/lib/completions/endpoints/cohere.rb
@@ -77,12 +77,8 @@ def extract_completion_from(response_raw)
           end
         end
 
-        def has_tool?(_ignored)
-          @has_tool
-        end
-
-        def native_tool_support?
-          true
+        def xml_tools_enabled?
+          false
         end
 
         def add_to_function_buffer(function_buffer, partial: nil, payload: nil)
diff --git a/lib/completions/endpoints/gemini.rb b/lib/completions/endpoints/gemini.rb
@@ -111,7 +111,7 @@ def chunk_to_string(chunk)
           chunk.to_s
         end
 
-        class Decoder
+        class GeminiStreamingDecoder
           def initialize
             @buffer = +""
           end
@@ -151,44 +151,86 @@ def decode(str)
         end
 
         def decode(chunk)
-          @decoder ||= Decoder.new
-          @decoder.decode(chunk)
+          json = JSON.parse(chunk, symbolize_names: true)
+          idx = -1
+          json.dig(:candidates, 0, :content, :parts).map do |part|
+            if part[:functionCall]
+              idx += 1
+              ToolCall.new(
+                id: "tool_#{idx}",
+                name: part[:functionCall][:name],
+                parameters: part[:functionCall][:args],
+              )
+            else
+              part = part[:text]
+              if part != ""
+                part
+              else
+                nil
+              end
+            end
+          end
         end
 
-        def extract_prompt_for_tokenizer(prompt)
-          prompt.to_s
-        end
+        def decode_chunk(chunk)
+          @tool_index ||= -1
 
-        def has_tool?(_response_data)
-          @has_function_call
+          streaming_decoder.decode(chunk).map do |parsed|
+            update_usage(parsed)
+            parsed.dig(:candidates, 0, :content, :parts).map do |part|
+              if part[:text]
+                part = part[:text]
+                if part != ""
+                  part
+                else
+                  nil
+                end
+              elsif part[:functionCall]
+                @tool_index += 1
+                ToolCall.new(
+                  id: "tool_#{@tool_index}",
+                  name: part[:functionCall][:name],
+                  parameters: part[:functionCall][:args],
+                )
+              end
+            end
+          end.flatten.compact
         end
 
-        def native_tool_support?
-          true
+        def update_usage(parsed)
+          usage = parsed.dig(:usageMetadata)
+          if usage
+            if prompt_token_count = usage[:promptTokenCount]
+              @prompt_token_count = prompt_token_count
+            end
+            if candidate_token_count = usage[:candidatesTokenCount]
+              @candidate_token_count = candidate_token_count
+            end
+          end
         end
 
-        def add_to_function_buffer(function_buffer, payload: nil, partial: nil)
-          if @streaming_mode
-            return function_buffer if !partial
-          else
-            partial = payload
+        def final_log_update(log)
+          if @prompt_token_count
+            log.request_tokens = @prompt_token_count
           end
 
-          function_buffer.at("tool_name").content = partial[:name] if partial[:name].present?
+          if @candidate_token_count
+            log.response_tokens = @candidate_token_count
+          end
+        end
 
-          if partial[:args]
-            argument_fragments =
-              partial[:args].reduce(+"") do |memo, (arg_name, value)|
-                memo << "\n<#{arg_name}>#{CGI.escapeHTML(value.to_s)}</#{arg_name}>"
-              end
-            argument_fragments << "\n"
+        def streaming_decoder
+          @decoder ||= GeminiStreamingDecoder.new
+        end
 
-            function_buffer.at("parameters").children =
-              Nokogiri::HTML5::DocumentFragment.parse(argument_fragments)
-          end
+        def extract_prompt_for_tokenizer(prompt)
+          prompt.to_s
+        end
 
-          function_buffer
+        def xml_tools_enabled?
+          false
         end
+
       end
     end
   end
diff --git a/spec/lib/completions/endpoints/gemini_spec.rb b/spec/lib/completions/endpoints/gemini_spec.rb
@@ -195,19 +195,16 @@ def tool_response
 
     response = llm.generate(prompt, user: user)
 
-    expected = (<<~XML).strip
-      <function_calls>
-      <invoke>
-      <tool_name>echo</tool_name>
-      <parameters>
-      <text>&lt;S&gt;ydney</text>
-      </parameters>
-      <tool_id>tool_0</tool_id>
-      </invoke>
-      </function_calls>
-    XML
-
-    expect(response.strip).to eq(expected)
+    tool =
+      DiscourseAi::Completions::ToolCall.new(
+        id: "tool_0",
+        name: "echo",
+        parameters: {
+          text: "<S>ydney",
+        },
+      )
+
+    expect(response).to eq(tool)
   end
 
   it "Supports Vision API" do
@@ -265,6 +262,67 @@ def tool_response
     expect(JSON.parse(req_body)).to eq(expected_prompt)
   end
 
+  it "Can stream tool calls correctly" do
+    rows = [
+      {
+        candidates: [
+          {
+            content: {
+              parts: [{ functionCall: { name: "echo", args: { text: "sam<>wh!s" } } }],
+              role: "model",
+            },
+            safetyRatings: [
+              { category: "HARM_CATEGORY_HATE_SPEECH", probability: "NEGLIGIBLE" },
+              { category: "HARM_CATEGORY_DANGEROUS_CONTENT", probability: "NEGLIGIBLE" },
+              { category: "HARM_CATEGORY_HARASSMENT", probability: "NEGLIGIBLE" },
+              { category: "HARM_CATEGORY_SEXUALLY_EXPLICIT", probability: "NEGLIGIBLE" },
+            ],
+          },
+        ],
+        usageMetadata: {
+          promptTokenCount: 625,
+          totalTokenCount: 625,
+        },
+        modelVersion: "gemini-1.5-pro-002",
+      },
+      {
+        candidates: [{ content: { parts: [{ text: "" }], role: "model" }, finishReason: "STOP" }],
+        usageMetadata: {
+          promptTokenCount: 625,
+          candidatesTokenCount: 4,
+          totalTokenCount: 629,
+        },
+        modelVersion: "gemini-1.5-pro-002",
+      },
+    ]
+
+    payload = rows.map { |r| "data: #{r.to_json}\n\n" }.join
+
+    llm = DiscourseAi::Completions::Llm.proxy("custom:#{model.id}")
+    url = "#{model.url}:streamGenerateContent?alt=sse&key=123"
+
+    prompt = DiscourseAi::Completions::Prompt.new("Hello", tools: [echo_tool])
+
+    output = []
+
+    stub_request(:post, url).to_return(status: 200, body: payload)
+    llm.generate(prompt, user: user) { |partial| output << partial }
+
+    tool_call = DiscourseAi::Completions::ToolCall.new(
+      id: "tool_0",
+      name: "echo",
+      parameters: {
+        text: "sam<>wh!s",
+      },
+    )
+
+    expect(output).to eq([tool_call])
+
+    log = AiApiAuditLog.order(:id).last
+    expect(log.request_tokens).to eq(625)
+    expect(log.response_tokens).to eq(4)
+  end
+
   it "Can correctly handle streamed responses even if they are chunked badly" do
     data = +""
     data << "da|ta: |"
@@ -279,12 +337,12 @@ def tool_response
     llm = DiscourseAi::Completions::Llm.proxy("custom:#{model.id}")
     url = "#{model.url}:streamGenerateContent?alt=sse&key=123"
 
-    output = +""
+    output = []
     gemini_mock.with_chunk_array_support do
       stub_request(:post, url).to_return(status: 200, body: split)
       llm.generate("Hello", user: user) { |partial| output << partial }
     end
 
-    expect(output).to eq("Hello World Sam")
+    expect(output.join).to eq("Hello World Sam")
   end
 end