Skip to content
This repository was archived by the owner on Jul 22, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions config/locales/client.en.yml
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,10 @@ en:
name: "Search"
description: "Enhances search experience by providing AI-generated answers to queries"
discoveries: "Discoveries"
embeddings:
name: "Embeddings"
description: "Powers features like Related Topics and AI Search by generating semantic representations of text"
hyde: "HyDE"
discord:
name: "Discord integration"
description: "Adds the ability to search Discord channels"
Expand Down
3 changes: 3 additions & 0 deletions config/locales/server.en.yml
Original file line number Diff line number Diff line change
Expand Up @@ -394,6 +394,9 @@ en:
spam_detector:
name: "Spam detector"
description: "Default persona powering our Spam detection feature"
content_creator:
name: "Content creator"
description: "Default persona powering HyDE search"

topic_not_found: "Summary unavailable, topic not found!"
summarizing: "Summarizing topic"
Expand Down
23 changes: 20 additions & 3 deletions config/settings.yml
Original file line number Diff line number Diff line change
Expand Up @@ -222,34 +222,45 @@ discourse_ai:
default: false
client: true
validator: "DiscourseAi::Configuration::EmbeddingsModuleValidator"
area: "ai-features/embeddings"
ai_embeddings_selected_model:
type: enum
default: ""
allow_any: false
enum: "DiscourseAi::Configuration::EmbeddingDefsEnumerator"
validator: "DiscourseAi::Configuration::EmbeddingDefsValidator"
area: "ai-features/embeddings"
ai_embeddings_per_post_enabled:
default: false
hidden: true
ai_embeddings_generate_for_pms: false
ai_embeddings_generate_for_pms:
default: false
area: "ai-features/embeddings"
ai_embeddings_semantic_related_topics_enabled:
default: false
client: true
ai_embeddings_semantic_related_topics: 5
ai_embeddings_semantic_related_include_closed_topics: true
area: "ai-features/embeddings"
ai_embeddings_semantic_related_topics:
default: 5
area: "ai-features/embeddings"
ai_embeddings_semantic_related_include_closed_topics:
default: true
area: "ai-features/embeddings"
ai_embeddings_backfill_batch_size:
default: 250
hidden: true
ai_embeddings_semantic_search_enabled:
default: false
client: true
validator: "DiscourseAi::Configuration::LlmDependencyValidator"
area: "ai-features/embeddings"
ai_embeddings_semantic_search_hyde_model:
default: ""
type: enum
allow_any: false
enum: "DiscourseAi::Configuration::LlmEnumerator"
validator: "DiscourseAi::Configuration::LlmValidator"
area: "ai-features/embeddings"
ai_embeddings_semantic_search_hyde_model_allowed_seeded_models:
default: ""
hidden: true
Expand All @@ -259,6 +270,12 @@ discourse_ai:
default: false
client: true
hidden: true
area: "ai-features/embeddings"
ai_embeddings_semantic_search_hyde_persona:
default: "-32"
type: enum
enum: "DiscourseAi::Configuration::PersonaEnumerator"
area: "ai-features/embeddings"

ai_embeddings_discourse_service_api_endpoint:
default: ""
Expand Down
2 changes: 2 additions & 0 deletions db/fixtures/personas/603_ai_personas.rb
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ def from_setting(setting_name)
setting_name = "ai_helper_custom_prompts_allowed_groups"
default_groups = [Group::AUTO_GROUPS[:staff]]
persona.allowed_group_ids = from_setting(setting_name) || default_groups
elsif persona_class == DiscourseAi::Personas::ContentCreator
persona.allowed_group_ids = [Group::AUTO_GROUPS[:everyone]]
else
persona.allowed_group_ids = [Group::AUTO_GROUPS[:trust_level_0]]
end
Expand Down
14 changes: 14 additions & 0 deletions lib/configuration/feature.rb
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,17 @@ def spam_features
]
end

def embeddings_features
feature_cache[:embeddings] ||= [
new(
"hyde",
"ai_embeddings_semantic_search_hyde_persona",
DiscourseAi::Configuration::Module::EMBEDDINGS_ID,
DiscourseAi::Configuration::Module::EMBEDDINGS,
),
]
end

def lookup_bot_persona_ids
AiPersona
.where(enabled: true)
Expand Down Expand Up @@ -196,6 +207,7 @@ def all
translation_features,
bot_features,
spam_features,
embeddings_features,
].flatten
end

Expand Down Expand Up @@ -241,6 +253,8 @@ def llm_models
DiscourseAi::AiHelper::Assistant.find_ai_helper_model(name, persona_klass)
when DiscourseAi::Configuration::Module::TRANSLATION
DiscourseAi::Translation::BaseTranslator.preferred_llm_model(persona_klass)
when DiscourseAi::Configuration::Module::EMBEDDINGS
DiscourseAi::Embeddings::SemanticSearch.new(nil).find_ai_hyde_model(persona_klass)
end

if llm_model.blank? && persona.default_llm_id
Expand Down
21 changes: 20 additions & 1 deletion lib/configuration/module.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,19 @@ class Module
TRANSLATION = "translation"
BOT = "bot"
SPAM = "spam"
EMBEDDINGS = "embeddings"

NAMES = [SUMMARIZATION, SEARCH, DISCORD, INFERENCE, AI_HELPER, TRANSLATION, BOT, SPAM].freeze
NAMES = [
SUMMARIZATION,
SEARCH,
DISCORD,
INFERENCE,
AI_HELPER,
TRANSLATION,
BOT,
SPAM,
EMBEDDINGS,
].freeze

SUMMARIZATION_ID = 1
SEARCH_ID = 2
Expand All @@ -22,6 +33,7 @@ class Module
TRANSLATION_ID = 6
BOT_ID = 7
SPAM_ID = 8
EMBEDDINGS_ID = 9

class << self
def all
Expand Down Expand Up @@ -75,6 +87,13 @@ def all
enabled_by_setting: "ai_spam_detection_enabled",
features: DiscourseAi::Configuration::Feature.spam_features,
),
new(
EMBEDDINGS_ID,
EMBEDDINGS,
enabled_by_setting: "ai_embeddings_enabled",
features: DiscourseAi::Configuration::Feature.embeddings_features,
extra_check: -> { SiteSetting.ai_embeddings_semantic_search_enabled },
),
]
end

Expand Down
67 changes: 51 additions & 16 deletions lib/embeddings/semantic_search.rb
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,9 @@ def search_for_topics(query, page = 1, hyde: true)
return Post.none
end

search_embedding = hyde ? hyde_embedding(search_term) : embedding(search_term)
search_embedding = nil
search_embedding = hyde_embedding(search_term) if hyde
search_embedding = embedding(search_term) if search_embedding.blank?

over_selection_limit = limit * OVER_SELECTION_FACTOR

Expand Down Expand Up @@ -176,26 +178,47 @@ def quick_search(query)
end

def hypothetical_post_from(search_term)
prompt = DiscourseAi::Completions::Prompt.new(<<~TEXT.strip)
You are a content creator for a forum. The forum description is as follows:
#{SiteSetting.title}
#{SiteSetting.site_description}
context =
DiscourseAi::Personas::BotContext.new(
user: @guardian.user,
skip_tool_details: true,
feature_name: "semantic_search_hyde",
messages: [{ type: :user, content: search_term }],
)

Put the forum post between <ai></ai> tags.
TEXT
bot = build_bot(@guardian.user)
return nil if bot.nil?

prompt.push(type: :user, content: <<~TEXT.strip)
Using this description, write a forum post about the subject inside the <input></input> XML tags:
structured_output = nil
raw_response = +""
hyde_schema_key = bot.persona.response_format&.first.to_h

<input>#{search_term}</input>
TEXT
buffer_blk =
Proc.new do |partial, _, type|
if type == :structured_output
structured_output = partial
elsif type.blank?
# Assume response is a regular completion.
raw_response << partial
end
end

llm_response =
DiscourseAi::Completions::Llm.proxy(
SiteSetting.ai_embeddings_semantic_search_hyde_model,
).generate(prompt, user: @guardian.user, feature_name: "semantic_search_hyde")
bot.reply(context, &buffer_blk)

structured_output&.read_buffered_property(hyde_schema_key["key"]&.to_sym) || raw_response
end

# Priorities are:
# 1. Persona's default LLM
# 2. `ai_embeddings_semantic_search_hyde_model` setting.
def find_ai_hyde_model(persona_klass)
model_id =
persona_klass.default_llm_id ||
SiteSetting.ai_embeddings_semantic_search_hyde_model&.split(":")&.last

Nokogiri::HTML5.fragment(llm_response).at("ai")&.text.presence || llm_response
return if model_id.blank?

LlmModel.find_by(id: model_id)
end

private
Expand All @@ -209,6 +232,18 @@ def build_hyde_key(digest, hyde_model)
def build_embedding_key(digest, hyde_model, embedding_model)
"#{build_hyde_key(digest, hyde_model)}-#{embedding_model}"
end

def build_bot(user)
persona_id = SiteSetting.ai_embeddings_semantic_search_hyde_persona

persona_klass = AiPersona.find_by(id: persona_id)&.class_instance
return if persona_klass.nil?

llm_model = find_ai_hyde_model(persona_klass)
return if llm_model.nil?

DiscourseAi::Personas::Bot.as(user, persona: persona_klass.new, model: llm_model)
end
end
end
end
2 changes: 1 addition & 1 deletion lib/personas/bot.rb
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ def reply(context, llm_args: {}, &update_blk)
text = +""
result.each { |item| text << item if item.is_a?(String) }
end
raw_context << [text, bot_user.username]
raw_context << [text, bot_user&.username]
end

total_completions += 1
Expand Down
33 changes: 33 additions & 0 deletions lib/personas/content_creator.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# frozen_string_literal: true

module DiscourseAi
module Personas
class ContentCreator < Persona
def self.default_enabled
false
end

def system_prompt
<<~PROMPT.strip
You are a content creator for a forum. The forum title and description is as follows:
* Ttitle: {site_title}
* Description: {site_description}

You will receive a couple of keywords and must create a post about the keywords, keeping the previous information in mind.

Format your response as a JSON object with a single key named "output", which has the created content.
Your output should be in the following format:
<output>
{"output": "xx"}
</output>

Where "xx" is replaced by the content.
PROMPT
end

def response_format
[{ "key" => "output", "type" => "string" }]
end
end
end
end
1 change: 1 addition & 0 deletions lib/personas/persona.rb
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ def system_personas
TopicTitleTranslator => -29,
ShortTextTranslator => -30,
SpamDetector => -31,
ContentCreator => -32,
}
end

Expand Down
8 changes: 4 additions & 4 deletions spec/lib/modules/embeddings/semantic_search_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def insert_candidate(candidate)
end

def trigger_search(query)
DiscourseAi::Completions::Llm.with_prepared_responses(["<ai>#{hypothetical_post}</ai>"]) do
DiscourseAi::Completions::Llm.with_prepared_responses([hypothetical_post]) do
subject.search_for_topics(query)
end
end
Expand Down Expand Up @@ -123,9 +123,9 @@ def trigger_search(query)
context "while searching as anon" do
it "returns an empty list" do
posts =
DiscourseAi::Completions::Llm.with_prepared_responses(
["<ai>#{hypothetical_post}</ai>"],
) { described_class.new(Guardian.new(nil)).search_for_topics(query) }
DiscourseAi::Completions::Llm.with_prepared_responses([hypothetical_post]) do
described_class.new(Guardian.new(nil)).search_for_topics(query)
end

expect(posts).to be_empty
end
Expand Down
4 changes: 2 additions & 2 deletions spec/lib/personas/tools/search_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@
DiscourseAi::Embeddings::Schema.for(Topic).store(post1.topic, hyde_embedding, "digest")

results =
DiscourseAi::Completions::Llm.with_prepared_responses(["<ai>#{query}</ai>"]) do
DiscourseAi::Completions::Llm.with_prepared_responses([query]) do
search.invoke(&progress_blk)
end

Expand All @@ -144,7 +144,7 @@

# results will be expanded by semantic search, but it will find nothing
results =
DiscourseAi::Completions::Llm.with_prepared_responses(["<ai>#{query}</ai>"]) do
DiscourseAi::Completions::Llm.with_prepared_responses([query]) do
search.invoke(&progress_blk)
end

Expand Down
2 changes: 1 addition & 1 deletion spec/lib/utils/search_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@

# Using a completely different search query, should still find via semantic search
results =
DiscourseAi::Completions::Llm.with_prepared_responses(["<ai>#{query}</ai>"]) do
DiscourseAi::Completions::Llm.with_prepared_responses([query]) do
described_class.perform_search(
search_query: "totally different query",
current_user: admin,
Expand Down
2 changes: 1 addition & 1 deletion spec/requests/admin/ai_features_controller_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
get "/admin/plugins/discourse-ai/ai-features.json"

expect(response.status).to eq(200)
expect(response.parsed_body["ai_features"].count).to eq(8)
expect(response.parsed_body["ai_features"].count).to eq(9)
end
end

Expand Down
2 changes: 1 addition & 1 deletion spec/system/admin_ai_features_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
ai_features_page.toggle_unconfigured

# this changes as we add more AI features
expect(ai_features_page).to have_listed_modules(7)
expect(ai_features_page).to have_listed_modules(8)
end

it "lists the persona used for the corresponding AI feature" do
Expand Down
Loading