Skip to content
This repository was archived by the owner on Jul 22, 2025. It is now read-only.

Commit 2063b38

Browse files
authored
FEATURE: Add Ollama provider (#812)
This allows our users to add the Ollama provider and use it to serve our AI bot (completion/dialect). In this PR, we introduce: DiscourseAi::Completions::Dialects::Ollama which would help us translate by utilizing Completions::Endpoint::Ollama Correct extract_completion_from and partials_from in Endpoints::Ollama Also Add tests for Endpoints::Ollama Introduce ollama_model fabricator
1 parent c7eaea4 commit 2063b38

File tree

6 files changed

+206
-13
lines changed

6 files changed

+206
-13
lines changed

lib/completions/dialects/dialect.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ def all_dialects
1515
DiscourseAi::Completions::Dialects::Gemini,
1616
DiscourseAi::Completions::Dialects::Claude,
1717
DiscourseAi::Completions::Dialects::Command,
18+
DiscourseAi::Completions::Dialects::Ollama,
1819
DiscourseAi::Completions::Dialects::OpenAiCompatible,
1920
]
2021
end

lib/completions/dialects/ollama.rb

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
# frozen_string_literal: true
2+
3+
module DiscourseAi
4+
module Completions
5+
module Dialects
6+
class Ollama < Dialect
7+
class << self
8+
def can_translate?(model_provider)
9+
model_provider == "ollama"
10+
end
11+
end
12+
13+
# TODO: Add tool suppport
14+
15+
def max_prompt_tokens
16+
llm_model.max_prompt_tokens
17+
end
18+
19+
private
20+
21+
def tokenizer
22+
llm_model.tokenizer_class
23+
end
24+
25+
def model_msg(msg)
26+
{ role: "assistant", content: msg[:content] }
27+
end
28+
29+
def system_msg(msg)
30+
{ role: "system", content: msg[:content] }
31+
end
32+
33+
def user_msg(msg)
34+
user_message = { role: "user", content: msg[:content] }
35+
36+
# TODO: Add support for user messages with empbeded user ids
37+
# TODO: Add support for user messages with attachments
38+
39+
user_message
40+
end
41+
end
42+
end
43+
end
44+
end

lib/completions/endpoints/ollama.rb

Lines changed: 4 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ def prepare_payload(prompt, model_params, _dialect)
4141
default_options
4242
.merge(model_params)
4343
.merge(messages: prompt)
44-
.tap { |payload| payload[:stream] = true if @streaming_mode }
44+
.tap { |payload| payload[:stream] = false if !@streaming_mode }
4545
end
4646

4747
def prepare_request(payload)
@@ -51,23 +51,14 @@ def prepare_request(payload)
5151
end
5252

5353
def partials_from(decoded_chunk)
54-
decoded_chunk
55-
.split("\n")
56-
.map do |line|
57-
data = line.split("data: ", 2)[1]
58-
data == "[DONE]" ? nil : data
59-
end
60-
.compact
54+
decoded_chunk.split("\n").compact
6155
end
6256

6357
def extract_completion_from(response_raw)
64-
parsed = JSON.parse(response_raw, symbolize_names: true).dig(:choices, 0)
65-
# half a line sent here
58+
parsed = JSON.parse(response_raw, symbolize_names: true)
6659
return if !parsed
6760

68-
response_h = @streaming_mode ? parsed.dig(:delta) : parsed.dig(:message)
69-
70-
response_h.dig(:content)
61+
parsed.dig(:message, :content)
7162
end
7263
end
7364
end

spec/fabricators/llm_model_fabricator.rb

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,3 +79,12 @@
7979
api_key "ABC"
8080
url "https://api.sambanova.ai/v1/chat/completions"
8181
end
82+
83+
Fabricator(:ollama_model, from: :llm_model) do
84+
display_name "Ollama llama 3.1"
85+
name "llama-3.1"
86+
provider "ollama"
87+
api_key "ABC"
88+
tokenizer "DiscourseAi::Tokenizer::Llama3Tokenizer"
89+
url "http://api.ollama.ai/api/chat"
90+
end
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
# frozen_string_literal: true
2+
3+
require_relative "dialect_context"
4+
5+
RSpec.describe DiscourseAi::Completions::Dialects::Ollama do
6+
fab!(:model) { Fabricate(:ollama_model) }
7+
let(:context) { DialectContext.new(described_class, model) }
8+
9+
describe "#translate" do
10+
it "translates a prompt written in our generic format to the Ollama format" do
11+
ollama_version = [
12+
{ role: "system", content: context.system_insts },
13+
{ role: "user", content: context.simple_user_input },
14+
]
15+
16+
translated = context.system_user_scenario
17+
18+
expect(translated).to eq(ollama_version)
19+
end
20+
21+
it "trims content if it's getting too long" do
22+
model.max_prompt_tokens = 5000
23+
translated = context.long_user_input_scenario
24+
25+
expect(translated.last[:role]).to eq("user")
26+
expect(translated.last[:content].length).to be < context.long_message_text.length
27+
end
28+
end
29+
30+
describe "#max_prompt_tokens" do
31+
it "returns the max_prompt_tokens from the llm_model" do
32+
model.max_prompt_tokens = 10_000
33+
expect(context.dialect(nil).max_prompt_tokens).to eq(10_000)
34+
end
35+
end
36+
end
Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
# frozen_string_literal: true
2+
3+
require_relative "endpoint_compliance"
4+
5+
class OllamaMock < EndpointMock
6+
def response(content)
7+
message_content = { content: content }
8+
9+
{
10+
created_at: "2024-09-25T06:47:21.283028Z",
11+
model: "llama3.1",
12+
message: { role: "assistant" }.merge(message_content),
13+
done: true,
14+
done_reason: "stop",
15+
total_duration: 7_639_718_541,
16+
load_duration: 299_886_663,
17+
prompt_eval_count: 18,
18+
prompt_eval_duration: 220_447_000,
19+
eval_count: 18,
20+
eval_duration: 220_447_000,
21+
}
22+
end
23+
24+
def stub_response(prompt, response_text)
25+
WebMock
26+
.stub_request(:post, "http://api.ollama.ai/api/chat")
27+
.with(body: request_body(prompt))
28+
.to_return(status: 200, body: JSON.dump(response(response_text)))
29+
end
30+
31+
def stream_line(delta)
32+
message_content = { content: delta }
33+
34+
+{
35+
model: "llama3.1",
36+
created_at: "2024-09-25T06:47:21.283028Z",
37+
message: { role: "assistant" }.merge(message_content),
38+
done: false,
39+
}.to_json
40+
end
41+
42+
def stub_raw(chunks)
43+
WebMock.stub_request(:post, "http://api.ollama.ai/api/chat").to_return(
44+
status: 200,
45+
body: chunks,
46+
)
47+
end
48+
49+
def stub_streamed_response(prompt, deltas)
50+
chunks = deltas.each_with_index.map { |_, index| stream_line(deltas[index]) }
51+
52+
chunks =
53+
(
54+
chunks.join("\n\n") << {
55+
model: "llama3.1",
56+
created_at: "2024-09-25T06:47:21.283028Z",
57+
message: {
58+
role: "assistant",
59+
content: "",
60+
},
61+
done: true,
62+
done_reason: "stop",
63+
total_duration: 7_639_718_541,
64+
load_duration: 299_886_663,
65+
prompt_eval_count: 18,
66+
prompt_eval_duration: 220_447_000,
67+
eval_count: 18,
68+
eval_duration: 220_447_000,
69+
}.to_json
70+
).split("")
71+
72+
WebMock
73+
.stub_request(:post, "http://api.ollama.ai/api/chat")
74+
.with(body: request_body(prompt, stream: true))
75+
.to_return(status: 200, body: chunks)
76+
77+
yield if block_given?
78+
end
79+
80+
def request_body(prompt, stream: false)
81+
model.default_options.merge(messages: prompt).tap { |b| b[:stream] = false if !stream }.to_json
82+
end
83+
end
84+
85+
RSpec.describe DiscourseAi::Completions::Endpoints::Ollama do
86+
subject(:endpoint) { described_class.new(model) }
87+
88+
fab!(:user)
89+
fab!(:model) { Fabricate(:ollama_model) }
90+
91+
let(:ollama_mock) { OllamaMock.new(endpoint) }
92+
93+
let(:compliance) do
94+
EndpointsCompliance.new(self, endpoint, DiscourseAi::Completions::Dialects::Ollama, user)
95+
end
96+
97+
describe "#perform_completion!" do
98+
context "when using regular mode" do
99+
it "completes a trivial prompt and logs the response" do
100+
compliance.regular_mode_simple_prompt(ollama_mock)
101+
end
102+
end
103+
end
104+
105+
describe "when using streaming mode" do
106+
context "with simpel prompts" do
107+
it "completes a trivial prompt and logs the response" do
108+
compliance.streaming_mode_simple_prompt(ollama_mock)
109+
end
110+
end
111+
end
112+
end

0 commit comments

Comments
 (0)