Skip to content
This repository was archived by the owner on Jul 22, 2025. It is now read-only.

Commit 75fb371

Browse files
authored
FEATURE: Use personas for generating hypothetical posts (#1482)
* FEATURE: Use personas for generating hypothetica posts * Update prompt
1 parent 40fa527 commit 75fb371

File tree

15 files changed

+158
-30
lines changed

15 files changed

+158
-30
lines changed

config/locales/client.en.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,10 @@ en:
222222
name: "Search"
223223
description: "Enhances search experience by providing AI-generated answers to queries"
224224
discoveries: "Discoveries"
225+
embeddings:
226+
name: "Embeddings"
227+
description: "Powers features like Related Topics and AI Search by generating semantic representations of text"
228+
hyde: "HyDE"
225229
discord:
226230
name: "Discord integration"
227231
description: "Adds the ability to search Discord channels"

config/locales/server.en.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -394,6 +394,9 @@ en:
394394
spam_detector:
395395
name: "Spam detector"
396396
description: "Default persona powering our Spam detection feature"
397+
content_creator:
398+
name: "Content creator"
399+
description: "Default persona powering HyDE search"
397400

398401
topic_not_found: "Summary unavailable, topic not found!"
399402
summarizing: "Summarizing topic"

config/settings.yml

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -222,34 +222,45 @@ discourse_ai:
222222
default: false
223223
client: true
224224
validator: "DiscourseAi::Configuration::EmbeddingsModuleValidator"
225+
area: "ai-features/embeddings"
225226
ai_embeddings_selected_model:
226227
type: enum
227228
default: ""
228229
allow_any: false
229230
enum: "DiscourseAi::Configuration::EmbeddingDefsEnumerator"
230231
validator: "DiscourseAi::Configuration::EmbeddingDefsValidator"
232+
area: "ai-features/embeddings"
231233
ai_embeddings_per_post_enabled:
232234
default: false
233235
hidden: true
234-
ai_embeddings_generate_for_pms: false
236+
ai_embeddings_generate_for_pms:
237+
default: false
238+
area: "ai-features/embeddings"
235239
ai_embeddings_semantic_related_topics_enabled:
236240
default: false
237241
client: true
238-
ai_embeddings_semantic_related_topics: 5
239-
ai_embeddings_semantic_related_include_closed_topics: true
242+
area: "ai-features/embeddings"
243+
ai_embeddings_semantic_related_topics:
244+
default: 5
245+
area: "ai-features/embeddings"
246+
ai_embeddings_semantic_related_include_closed_topics:
247+
default: true
248+
area: "ai-features/embeddings"
240249
ai_embeddings_backfill_batch_size:
241250
default: 250
242251
hidden: true
243252
ai_embeddings_semantic_search_enabled:
244253
default: false
245254
client: true
246255
validator: "DiscourseAi::Configuration::LlmDependencyValidator"
256+
area: "ai-features/embeddings"
247257
ai_embeddings_semantic_search_hyde_model:
248258
default: ""
249259
type: enum
250260
allow_any: false
251261
enum: "DiscourseAi::Configuration::LlmEnumerator"
252262
validator: "DiscourseAi::Configuration::LlmValidator"
263+
area: "ai-features/embeddings"
253264
ai_embeddings_semantic_search_hyde_model_allowed_seeded_models:
254265
default: ""
255266
hidden: true
@@ -259,6 +270,12 @@ discourse_ai:
259270
default: false
260271
client: true
261272
hidden: true
273+
area: "ai-features/embeddings"
274+
ai_embeddings_semantic_search_hyde_persona:
275+
default: "-32"
276+
type: enum
277+
enum: "DiscourseAi::Configuration::PersonaEnumerator"
278+
area: "ai-features/embeddings"
262279

263280
ai_embeddings_discourse_service_api_endpoint:
264281
default: ""

db/fixtures/personas/603_ai_personas.rb

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@ def from_setting(setting_name)
3636
setting_name = "ai_helper_custom_prompts_allowed_groups"
3737
default_groups = [Group::AUTO_GROUPS[:staff]]
3838
persona.allowed_group_ids = from_setting(setting_name) || default_groups
39+
elsif persona_class == DiscourseAi::Personas::ContentCreator
40+
persona.allowed_group_ids = [Group::AUTO_GROUPS[:everyone]]
3941
else
4042
persona.allowed_group_ids = [Group::AUTO_GROUPS[:trust_level_0]]
4143
end

lib/configuration/feature.rb

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,17 @@ def spam_features
144144
]
145145
end
146146

147+
def embeddings_features
148+
feature_cache[:embeddings] ||= [
149+
new(
150+
"hyde",
151+
"ai_embeddings_semantic_search_hyde_persona",
152+
DiscourseAi::Configuration::Module::EMBEDDINGS_ID,
153+
DiscourseAi::Configuration::Module::EMBEDDINGS,
154+
),
155+
]
156+
end
157+
147158
def lookup_bot_persona_ids
148159
AiPersona
149160
.where(enabled: true)
@@ -196,6 +207,7 @@ def all
196207
translation_features,
197208
bot_features,
198209
spam_features,
210+
embeddings_features,
199211
].flatten
200212
end
201213

@@ -241,6 +253,8 @@ def llm_models
241253
DiscourseAi::AiHelper::Assistant.find_ai_helper_model(name, persona_klass)
242254
when DiscourseAi::Configuration::Module::TRANSLATION
243255
DiscourseAi::Translation::BaseTranslator.preferred_llm_model(persona_klass)
256+
when DiscourseAi::Configuration::Module::EMBEDDINGS
257+
DiscourseAi::Embeddings::SemanticSearch.new(nil).find_ai_hyde_model(persona_klass)
244258
end
245259

246260
if llm_model.blank? && persona.default_llm_id

lib/configuration/module.rb

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,19 @@ class Module
1111
TRANSLATION = "translation"
1212
BOT = "bot"
1313
SPAM = "spam"
14+
EMBEDDINGS = "embeddings"
1415

15-
NAMES = [SUMMARIZATION, SEARCH, DISCORD, INFERENCE, AI_HELPER, TRANSLATION, BOT, SPAM].freeze
16+
NAMES = [
17+
SUMMARIZATION,
18+
SEARCH,
19+
DISCORD,
20+
INFERENCE,
21+
AI_HELPER,
22+
TRANSLATION,
23+
BOT,
24+
SPAM,
25+
EMBEDDINGS,
26+
].freeze
1627

1728
SUMMARIZATION_ID = 1
1829
SEARCH_ID = 2
@@ -22,6 +33,7 @@ class Module
2233
TRANSLATION_ID = 6
2334
BOT_ID = 7
2435
SPAM_ID = 8
36+
EMBEDDINGS_ID = 9
2537

2638
class << self
2739
def all
@@ -75,6 +87,13 @@ def all
7587
enabled_by_setting: "ai_spam_detection_enabled",
7688
features: DiscourseAi::Configuration::Feature.spam_features,
7789
),
90+
new(
91+
EMBEDDINGS_ID,
92+
EMBEDDINGS,
93+
enabled_by_setting: "ai_embeddings_enabled",
94+
features: DiscourseAi::Configuration::Feature.embeddings_features,
95+
extra_check: -> { SiteSetting.ai_embeddings_semantic_search_enabled },
96+
),
7897
]
7998
end
8099

lib/embeddings/semantic_search.rb

Lines changed: 51 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,9 @@ def search_for_topics(query, page = 1, hyde: true)
7878
return Post.none
7979
end
8080

81-
search_embedding = hyde ? hyde_embedding(search_term) : embedding(search_term)
81+
search_embedding = nil
82+
search_embedding = hyde_embedding(search_term) if hyde
83+
search_embedding = embedding(search_term) if search_embedding.blank?
8284

8385
over_selection_limit = limit * OVER_SELECTION_FACTOR
8486

@@ -176,26 +178,47 @@ def quick_search(query)
176178
end
177179

178180
def hypothetical_post_from(search_term)
179-
prompt = DiscourseAi::Completions::Prompt.new(<<~TEXT.strip)
180-
You are a content creator for a forum. The forum description is as follows:
181-
#{SiteSetting.title}
182-
#{SiteSetting.site_description}
181+
context =
182+
DiscourseAi::Personas::BotContext.new(
183+
user: @guardian.user,
184+
skip_tool_details: true,
185+
feature_name: "semantic_search_hyde",
186+
messages: [{ type: :user, content: search_term }],
187+
)
183188

184-
Put the forum post between <ai></ai> tags.
185-
TEXT
189+
bot = build_bot(@guardian.user)
190+
return nil if bot.nil?
186191

187-
prompt.push(type: :user, content: <<~TEXT.strip)
188-
Using this description, write a forum post about the subject inside the <input></input> XML tags:
192+
structured_output = nil
193+
raw_response = +""
194+
hyde_schema_key = bot.persona.response_format&.first.to_h
189195

190-
<input>#{search_term}</input>
191-
TEXT
196+
buffer_blk =
197+
Proc.new do |partial, _, type|
198+
if type == :structured_output
199+
structured_output = partial
200+
elsif type.blank?
201+
# Assume response is a regular completion.
202+
raw_response << partial
203+
end
204+
end
192205

193-
llm_response =
194-
DiscourseAi::Completions::Llm.proxy(
195-
SiteSetting.ai_embeddings_semantic_search_hyde_model,
196-
).generate(prompt, user: @guardian.user, feature_name: "semantic_search_hyde")
206+
bot.reply(context, &buffer_blk)
207+
208+
structured_output&.read_buffered_property(hyde_schema_key["key"]&.to_sym) || raw_response
209+
end
210+
211+
# Priorities are:
212+
# 1. Persona's default LLM
213+
# 2. `ai_embeddings_semantic_search_hyde_model` setting.
214+
def find_ai_hyde_model(persona_klass)
215+
model_id =
216+
persona_klass.default_llm_id ||
217+
SiteSetting.ai_embeddings_semantic_search_hyde_model&.split(":")&.last
197218

198-
Nokogiri::HTML5.fragment(llm_response).at("ai")&.text.presence || llm_response
219+
return if model_id.blank?
220+
221+
LlmModel.find_by(id: model_id)
199222
end
200223

201224
private
@@ -209,6 +232,18 @@ def build_hyde_key(digest, hyde_model)
209232
def build_embedding_key(digest, hyde_model, embedding_model)
210233
"#{build_hyde_key(digest, hyde_model)}-#{embedding_model}"
211234
end
235+
236+
def build_bot(user)
237+
persona_id = SiteSetting.ai_embeddings_semantic_search_hyde_persona
238+
239+
persona_klass = AiPersona.find_by(id: persona_id)&.class_instance
240+
return if persona_klass.nil?
241+
242+
llm_model = find_ai_hyde_model(persona_klass)
243+
return if llm_model.nil?
244+
245+
DiscourseAi::Personas::Bot.as(user, persona: persona_klass.new, model: llm_model)
246+
end
212247
end
213248
end
214249
end

lib/personas/bot.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,7 @@ def reply(context, llm_args: {}, &update_blk)
171171
text = +""
172172
result.each { |item| text << item if item.is_a?(String) }
173173
end
174-
raw_context << [text, bot_user.username]
174+
raw_context << [text, bot_user&.username]
175175
end
176176

177177
total_completions += 1

lib/personas/content_creator.rb

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
# frozen_string_literal: true
2+
3+
module DiscourseAi
4+
module Personas
5+
class ContentCreator < Persona
6+
def self.default_enabled
7+
false
8+
end
9+
10+
def system_prompt
11+
<<~PROMPT.strip
12+
You are a content creator for a forum. The forum title and description is as follows:
13+
* Ttitle: {site_title}
14+
* Description: {site_description}
15+
16+
You will receive a couple of keywords and must create a post about the keywords, keeping the previous information in mind.
17+
18+
Format your response as a JSON object with a single key named "output", which has the created content.
19+
Your output should be in the following format:
20+
<output>
21+
{"output": "xx"}
22+
</output>
23+
24+
Where "xx" is replaced by the content.
25+
PROMPT
26+
end
27+
28+
def response_format
29+
[{ "key" => "output", "type" => "string" }]
30+
end
31+
end
32+
end
33+
end

lib/personas/persona.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ def system_personas
6969
TopicTitleTranslator => -29,
7070
ShortTextTranslator => -30,
7171
SpamDetector => -31,
72+
ContentCreator => -32,
7273
}
7374
end
7475

0 commit comments

Comments
 (0)