Skip to content
This repository was archived by the owner on Jul 22, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions app/models/llm_model.rb
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@ def self.provider_params
google: {
disable_native_tools: :checkbox,
enable_thinking: :checkbox,
disable_temperature: :checkbox,
disable_top_p: :checkbox,
thinking_tokens: :number,
},
azure: {
Expand Down
42 changes: 39 additions & 3 deletions config/eval-llms.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,27 @@
llms:
o3:
display_name: O3
name: o3
tokenizer: DiscourseAi::Tokenizer::OpenAiTokenizer
api_key_env: OPENAI_API_KEY
provider: open_ai
url: https://api.openai.com/v1/chat/completions
max_prompt_tokens: 131072
vision_enabled: true
provider_params:
disable_top_p: true
disable_temperature: true

gpt-41:
display_name: GPT-4.1
name: gpt-4.1
tokenizer: DiscourseAi::Tokenizer::OpenAiTokenizer
api_key_env: OPENAI_API_KEY
provider: open_ai
url: https://api.openai.com/v1/chat/completions
max_prompt_tokens: 131072
vision_enabled: true

gpt-4o:
display_name: GPT-4o
name: gpt-4o
Expand Down Expand Up @@ -74,12 +97,25 @@ llms:
max_prompt_tokens: 1000000
vision_enabled: true

gemini-2.0-pro-exp:
gemini-2.5-flash:
display_name: Gemini 2.5 Flash
name: gemini-2-5-flash
tokenizer: DiscourseAi::Tokenizer::GeminiTokenizer
api_key_env: GEMINI_API_KEY
provider: google
url: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash
max_prompt_tokens: 1000000
vision_enabled: true
provider_params:
disable_top_p: true
disable_temperature: true

gemini-2.0-pro:
display_name: Gemini 2.0 pro
name: gemini-2-0-pro-exp
name: gemini-2-0-pro
tokenizer: DiscourseAi::Tokenizer::GeminiTokenizer
api_key_env: GEMINI_API_KEY
provider: google
url: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-pro-exp
url: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-pro
max_prompt_tokens: 1000000
vision_enabled: true
3 changes: 2 additions & 1 deletion config/locales/client.en.yml
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,7 @@ en:
markdown_tables: "Generate Markdown table"
custom_prompt: "Custom prompt"
image_caption: "Caption images"
translator: "Translator"

translation:
name: "Translation"
Expand All @@ -257,7 +258,7 @@ en:
post_raw_translator: "Post raw translator"
topic_title_translator: "Topic title translator"
short_text_translator: "Short text translator"

spam:
name: "Spam"
description: "Identifies potential spam using the selected LLM and flags it for site moderators to inspect in the review queue"
Expand Down
7 changes: 1 addition & 6 deletions evals/lib/eval.rb
Original file line number Diff line number Diff line change
Expand Up @@ -200,12 +200,7 @@ class << user
user.admin = true
end
result =
helper.generate_and_send_prompt(
name,
input,
current_user = user,
_force_default_locale = false,
)
helper.generate_and_send_prompt(name, input, current_user = user, force_default_locale: false)

result[:suggestions].first
end
Expand Down
39 changes: 26 additions & 13 deletions lib/ai_helper/assistant.rb
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def attach_user_context(context, user = nil, force_default_locale: false)
context.user_language = "#{locale_hash["name"]}"

if user
timezone = user.user_option.timezone || "UTC"
timezone = user&.user_option&.timezone || "UTC"
current_time = Time.now.in_time_zone(timezone)

temporal_context = {
Expand Down Expand Up @@ -126,21 +126,29 @@ def generate_prompt(
)
context = attach_user_context(context, user, force_default_locale: force_default_locale)

helper_response = +""
bad_json = false
json_summary_schema_key = bot.persona.response_format&.first.to_h

schema_key = json_summary_schema_key["key"]&.to_sym
schema_type = json_summary_schema_key["type"]

if schema_type == "array"
helper_response = []
else
helper_response = +""
end

buffer_blk =
Proc.new do |partial, _, type|
json_summary_schema_key = bot.persona.response_format&.first.to_h
helper_response = [] if json_summary_schema_key["type"] == "array"
if type == :structured_output
helper_chunk = partial.read_buffered_property(json_summary_schema_key["key"]&.to_sym)
if type == :structured_output && schema_type
helper_chunk = partial.read_buffered_property(schema_key)
if !helper_chunk.nil? && !helper_chunk.empty?
if json_summary_schema_key["type"] != "array"
helper_response = helper_chunk
else
if schema_type == "string" || schema_type == "array"
helper_response << helper_chunk
else
helper_response = helper_chunk
end
block.call(helper_chunk) if block
block.call(helper_chunk) if block && !bad_json
end
elsif type.blank?
# Assume response is a regular completion.
Expand Down Expand Up @@ -255,7 +263,7 @@ def generate_image_caption(upload, user)
Proc.new do |partial, _, type|
if type == :structured_output
structured_output = partial
json_summary_schema_key = bot.persona.response_format&.first.to_h
bot.persona.response_format&.first.to_h
end
end

Expand Down Expand Up @@ -287,6 +295,11 @@ def build_bot(helper_mode, user)
end

def find_ai_helper_model(helper_mode, persona_klass)
if helper_mode == IMAGE_CAPTION && @image_caption_llm.is_a?(LlmModel)
return @image_caption_llm
end

return @helper_llm if helper_mode != IMAGE_CAPTION && @helper_llm.is_a?(LlmModel)
self.class.find_ai_helper_model(helper_mode, persona_klass)
end

Expand All @@ -299,9 +312,9 @@ def self.find_ai_helper_model(helper_mode, persona_klass)

if !model_id
if helper_mode == IMAGE_CAPTION
model_id = @helper_llm || SiteSetting.ai_helper_image_caption_model&.split(":")&.last
model_id = SiteSetting.ai_helper_image_caption_model&.split(":")&.last
else
model_id = @image_caption_llm || SiteSetting.ai_helper_model&.split(":")&.last
model_id = SiteSetting.ai_helper_model&.split(":")&.last
end
end

Expand Down
14 changes: 12 additions & 2 deletions lib/completions/endpoints/base.rb
Original file line number Diff line number Diff line change
Expand Up @@ -187,10 +187,10 @@ def perform_completion!(
blk =
lambda do |partial|
if partial.is_a?(String)
partial = xml_stripper << partial if xml_stripper
partial = xml_stripper << partial if xml_stripper && !partial.empty?

if structured_output.present?
structured_output << partial
structured_output << partial if !partial.empty?
partial = structured_output
end
end
Expand Down Expand Up @@ -252,6 +252,15 @@ def perform_completion!(
end
xml_tool_processor.finish.each { |partial| blk.call(partial) } if xml_tool_processor
decode_chunk_finish.each { |partial| blk.call(partial) }

if structured_output
structured_output.finish
if structured_output.broken?
# signal last partial output which will get parsed
# by best effort json parser
blk.call("")
end
end
return response_data
ensure
if log
Expand Down Expand Up @@ -448,6 +457,7 @@ def non_streaming_response(

if structured_output.present?
response_data.each { |data| structured_output << data if data.is_a?(String) }
structured_output.finish

return structured_output
end
Expand Down
3 changes: 2 additions & 1 deletion lib/completions/endpoints/gemini.rb
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ def normalize_model_params(model_params)

model_params[:topP] = model_params.delete(:top_p) if model_params[:top_p]

# temperature already supported
model_params.delete(:temperature) if llm_model.lookup_custom_param("disable_temperature")
model_params.delete(:topP) if llm_model.lookup_custom_param("disable_top_p")

model_params
end
Expand Down
1 change: 1 addition & 0 deletions lib/completions/json_streaming_parser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ def initialize
#
# Returns a UTF-8 encoded String.
def <<(data)
data = data.dup if data.frozen?
# Avoid state machine for complete UTF-8.
if @buffer.empty?
data.force_encoding(Encoding::UTF_8)
Expand Down
37 changes: 31 additions & 6 deletions lib/completions/structured_output.rb
Original file line number Diff line number Diff line change
Expand Up @@ -17,23 +17,48 @@ def initialize(json_schema_properties)
@raw_cursor = 0

@partial_json_tracker = JsonStreamingTracker.new(self)

@type_map = {}
json_schema_properties.each { |name, prop| @type_map[name.to_sym] = prop[:type].to_sym }

@done = false
end

def to_s
# we may want to also normalize the JSON here for the broken case
@raw_response
end

attr_reader :last_chunk_buffer

def <<(raw)
raise "Cannot append to a completed StructuredOutput" if @done
@raw_response << raw
@partial_json_tracker << raw
end

def finish
@done = true
end

def broken?
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No need for this if we move the best effort parser inside this object.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

actually we still need it cause on finish when streaming we need to double check.

@partial_json_tracker.broken?
end

def read_buffered_property(prop_name)
# Safeguard: If the model is misbehaving and generating something that's not a JSON,
# treat response as a normal string.
# This is a best-effort to recover from an unexpected scenario.
if @partial_json_tracker.broken?
unread_chunk = @raw_response[@raw_cursor..]
@raw_cursor = @raw_response.length
return unread_chunk
if @done
return nil if @type_map[prop_name.to_sym].nil?
return(
DiscourseAi::Utils::BestEffortJsonParser.extract_key(
@raw_response,
@type_map[prop_name.to_sym],
prop_name,
)
)
else
return nil
end
end

# Maybe we haven't read that part of the JSON yet.
Expand Down
6 changes: 6 additions & 0 deletions lib/configuration/feature.rb
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,12 @@ def ai_helper_features
DiscourseAi::Configuration::Module::AI_HELPER_ID,
DiscourseAi::Configuration::Module::AI_HELPER,
),
new(
"translator",
"ai_helper_translator_persona",
DiscourseAi::Configuration::Module::AI_HELPER_ID,
DiscourseAi::Configuration::Module::AI_HELPER,
),
new(
"custom_prompt",
"ai_helper_custom_prompt_persona",
Expand Down
7 changes: 4 additions & 3 deletions lib/personas/translator.rb
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,12 @@ def system_prompt

Format your response as a JSON object with a single key named "output", which has the translation as the value.
Your output should be in the following format:
<output>
{"output": "xx"}
</output>

{"output": "xx"}

Where "xx" is replaced by the translation.

reply with valid JSON only
PROMPT
end

Expand Down
Loading
Loading