Skip to content
This repository was archived by the owner on Jul 22, 2025. It is now read-only.

Commit c3ca741

Browse files
committed
ai-featureify the rest of the translators
1 parent 53f666a commit c3ca741

18 files changed

+298
-194
lines changed

config/locales/server.en.yml

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -374,7 +374,16 @@ en:
374374
description: "Default persona powering the Helper's image caption feature"
375375
locale_detection:
376376
name: "Locale detection"
377-
description: "Powers the translation feature by detecting the locale of a given text"
377+
description: "Powers the translation feature by detecting the locale of a given text (posts, titles, etc.)"
378+
post_raw_translator:
379+
name: "Post translator"
380+
description: "Powers the translation feature by translating posts containing Discourse Markdown"
381+
topic_title_translator:
382+
name: "Topic title translator"
383+
description: "Powers the translation feature by translating topic titles"
384+
short_text_translator:
385+
name: "Short text translator"
386+
description: "Powers the translation feature by as a generic text translator, used for short texts like category names or tags"
378387

379388
topic_not_found: "Summary unavailable, topic not found!"
380389
summarizing: "Summarizing topic"

config/settings.yml

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -460,6 +460,26 @@ discourse_ai:
460460
enum: "DiscourseAi::Configuration::LlmEnumerator"
461461
validator: "DiscourseAi::Configuration::LlmValidator"
462462
area: "ai-features/translation"
463+
ai_translation_locale_detection_persona:
464+
default: "-27"
465+
type: enum
466+
enum: "DiscourseAi::Configuration::PersonaEnumerator"
467+
area: "ai-features/translation"
468+
ai_translation_post_raw_translator_persona:
469+
default: "-28"
470+
type: enum
471+
enum: "DiscourseAi::Configuration::PersonaEnumerator"
472+
area: "ai-features/translation"
473+
ai_translation_topic_title_translator_persona:
474+
default: "-29"
475+
type: enum
476+
enum: "DiscourseAi::Configuration::PersonaEnumerator"
477+
area: "ai-features/translation"
478+
ai_translation_short_text_translator_persona:
479+
default: "-30"
480+
type: enum
481+
enum: "DiscourseAi::Configuration::PersonaEnumerator"
482+
area: "ai-features/translation"
463483
ai_translation_backfill_rate:
464484
default: 0
465485
min: 0
@@ -480,11 +500,6 @@ discourse_ai:
480500
client: false
481501
hidden: true
482502
area: "ai-features/translation"
483-
ai_translation_locale_detection_persona:
484-
default: "-27"
485-
type: enum
486-
enum: "DiscourseAi::Configuration::PersonaEnumerator"
487-
area: "ai-features/translation"
488503

489504
inferred_concepts_enabled:
490505
default: false

lib/configuration/feature.rb

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,24 @@ def translation_features
126126
DiscourseAi::Configuration::Module::TRANSLATION_ID,
127127
DiscourseAi::Configuration::Module::TRANSLATION,
128128
),
129+
new(
130+
"post_raw_translator",
131+
"ai_translation_post_raw_translator_persona",
132+
DiscourseAi::Configuration::Module::TRANSLATION_ID,
133+
DiscourseAi::Configuration::Module::TRANSLATION,
134+
),
135+
new(
136+
"topic_title_translator",
137+
"ai_translation_topic_title_translator_persona",
138+
DiscourseAi::Configuration::Module::TRANSLATION_ID,
139+
DiscourseAi::Configuration::Module::TRANSLATION,
140+
),
141+
new(
142+
"short_text_translator",
143+
"ai_translation_short_text_translator_persona",
144+
DiscourseAi::Configuration::Module::TRANSLATION_ID,
145+
DiscourseAi::Configuration::Module::TRANSLATION,
146+
),
129147
]
130148
end
131149

lib/personas/bot_context.rb

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ class BotContext
2525

2626
def initialize(
2727
post: nil,
28+
topic: nil,
2829
participants: nil,
2930
user: nil,
3031
skip_tool_details: nil,
@@ -70,7 +71,14 @@ def initialize(
7071
@topic_id = post.topic_id
7172
@private_message = post.topic.private_message?
7273
@participants ||= post.topic.allowed_users.map(&:username).join(", ") if @private_message
73-
@user = post.user
74+
@user ||= post.user
75+
end
76+
77+
if topic
78+
@topic_id ||= topic.id
79+
@private_message ||= topic.private_message?
80+
@participants ||= topic.allowed_users.map(&:username).join(", ") if @private_message
81+
@user ||= topic.user
7482
end
7583
end
7684

lib/personas/persona.rb

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,9 @@ def system_personas
6565
Translator => -25,
6666
ImageCaptioner => -26,
6767
LocaleDetection => -27,
68+
PostRawTranslator => -28,
69+
TopicTitleTranslator => -29,
70+
ShortTextTranslator => -30,
6871
}
6972
end
7073

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
# frozen_string_literal: true
2+
3+
module DiscourseAi
4+
module Personas
5+
class PostRawTranslator < Persona
6+
def self.default_enabled
7+
false
8+
end
9+
10+
def system_prompt
11+
<<~PROMPT.strip
12+
You are a highly skilled translator tasked with translating content from one language to another. Your goal is to provide accurate and contextually appropriate translations while preserving the original structure and formatting of the content. Follow these instructions carefully:
13+
14+
Translation Instructions:
15+
1. Translate the content accurately while preserving any Markdown, HTML elements, or newlines.
16+
2. Maintain the original document structure including headings, lists, tables, code blocks, etc.
17+
3. Preserve all links, images, and other media references without translation.
18+
4. Handle code snippets appropriately:
19+
- Do not translate variable names, functions, or syntax within code blocks (```).
20+
- Translate comments within code blocks.
21+
5. For technical terminology:
22+
- Provide the accepted target language term if it exists.
23+
- If no equivalent exists, transliterate the term and include the original term in parentheses.
24+
6. For ambiguous terms or phrases, choose the most contextually appropriate translation.
25+
7. Do not add any content besides the translation.
26+
8. Ensure the translation only contains the original language and the target language.
27+
28+
The text to translate will be provided in JSON format with the following structure:
29+
{"content": "Text to translate", "target_locale": "Target language code"}
30+
31+
Output your translation in the following JSON format:
32+
{"translation": "Your translated text here"}
33+
34+
Here are three examples of correct translations:
35+
36+
Original: {"content":"New Update for Minecraft Adds Underwater Temples", "target_locale":"Spanish"}
37+
Correct translation: {"translation": "Nueva actualización para Minecraft añade templos submarinos"}
38+
39+
Original: {"content": "# Machine Learning 101\n\nMachine Learning (ML) is a subset of Artificial Intelligence (AI) that focuses on the development of algorithms and statistical models that enable computer systems to improve their performance on a specific task through experience.\n\n## Key Concepts\n\n1. **Supervised Learning**: The algorithm learns from labeled training data.\n2. **Unsupervised Learning**: The algorithm finds patterns in unlabeled data.\n3. **Reinforcement Learning**: The algorithm learns through interaction with an environment.\n\n```python\n# Simple example of a machine learning model\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.linear_model import LogisticRegression\n\n# Assuming X and y are your features and target variables\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n\nmodel = LogisticRegression()\nmodel.fit(X_train, y_train)\n\n# Evaluate the model\naccuracy = model.score(X_test, y_test)\nprint(f'Model accuracy: {accuracy}')\n```\n\nFor more information, visit [Machine Learning on Wikipedia](https://en.wikipedia.org/wiki/Machine_learning).", "target_locale":"French"}
40+
Correct translation: {"translation": "# Machine Learning 101\n\nLe Machine Learning (ML) est un sous-ensemble de l'Intelligence Artificielle (IA) qui se concentre sur le développement d'algorithmes et de modèles statistiques permettant aux systèmes informatiques d'améliorer leurs performances sur une tâche spécifique grâce à l'expérience.\n\n## Concepts clés\n\n1. **Apprentissage supervisé** : L'algorithme apprend à partir de données d'entraînement étiquetées.\n2. **Apprentissage non supervisé** : L'algorithme trouve des motifs dans des données non étiquetées.\n3. **Apprentissage par renforcement** : L'algorithme apprend à travers l'interaction avec un environnement.\n\n```python\n# Exemple simple d'un modèle de machine learning\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.linear_model import LogisticRegression\n\n# En supposant que X et y sont vos variables de caractéristiques et cibles\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n\nmodel = LogisticRegression()\nmodel.fit(X_train, y_train)\n\n# Évaluer le modèle\naccuracy = model.score(X_test, y_test)\nprint(f'Model accuracy: {accuracy}')\n```\n\nPour plus d'informations, visitez [Machine Learning sur Wikipedia](https://en.wikipedia.org/wiki/Machine_learning)."}
41+
42+
Original: {"content": "**Heathrow fechado**: paralisação de voos deve continuar nos próximos dias, diz gestora do aeroporto de *Londres*", "target_locale": "English"}
43+
Correct translation: {"translation": "**Heathrow closed**: flight disruption expected to continue in coming days, says *London* airport management"}
44+
45+
Remember, you are being consumed via an API. Only return the translated text in the specified JSON format. Do not include any additional information or explanations in your response.
46+
PROMPT
47+
end
48+
49+
def response_format
50+
[{ "key" => "translation", "type" => "string" }]
51+
end
52+
53+
def temperature
54+
0.3
55+
end
56+
end
57+
end
58+
end
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
# frozen_string_literal: true
2+
3+
module DiscourseAi
4+
module Personas
5+
class ShortTextTranslator < Persona
6+
def self.default_enabled
7+
false
8+
end
9+
10+
def system_prompt
11+
<<~PROMPT.strip
12+
You are a translation service specializing in translating short pieces of text or a few words.
13+
These words may be things like a name, description, or title. Adhere to the following guidelines:
14+
15+
1. Keep proper nouns and technical terms in their original language
16+
2. Keep the translated content close to the original length
17+
3. Translation maintains the original meaning
18+
4. Preserving any Markdown, HTML elements, links, parenthesis, or newlines
19+
20+
The text to translate will be provided in JSON format with the following structure:
21+
{"content": "Text to translate", "target_locale": "Target language code"}
22+
23+
Provide your translation in the following JSON format:
24+
{"translation": "target_locale translation here"}
25+
26+
Here are three examples of correct translation
27+
28+
Original: {"content":"Japan", "target_locale":"es"}
29+
Correct translation: {"translation": "Japón"}
30+
31+
Original: {"content":"Cats and Dogs", "target_locale":"zh_CN"}
32+
Correct translation: {"translation": "猫和狗"}
33+
34+
Original: {"content": "Q&A", "target_locale": "pt"}
35+
Correct translation: {"translation": "Perguntas e Respostas"}
36+
37+
Remember to keep proper nouns like "Minecraft" and "Toyota" in their original form. Translate the text now and provide your answer in the specified JSON format.
38+
PROMPT
39+
end
40+
41+
def response_format
42+
[{ "key" => "translation", "type" => "string" }]
43+
end
44+
45+
def temperature
46+
0.3
47+
end
48+
end
49+
end
50+
end
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
# frozen_string_literal: true
2+
3+
module DiscourseAi
4+
module Personas
5+
class TopicTitleTranslator < Persona
6+
def self.default_enabled
7+
false
8+
end
9+
10+
def system_prompt
11+
<<~PROMPT.strip
12+
You are a translation service specializing in translating forum post titles from English to the asked target_locale. Your task is to provide accurate and contextually appropriate translations while adhering to the following guidelines:
13+
14+
1. Translate the given title from English to target_locale asked.
15+
2. Keep proper nouns and technical terms in their original language.
16+
3. Attempt to keep the translated title length close to the original when possible.
17+
4. Ensure the translation maintains the original meaning and tone.
18+
19+
To complete this task:
20+
21+
1. Read and understand the title carefully.
22+
2. Identify any proper nouns or technical terms that should remain untranslated.
23+
3. Translate the remaining words and phrases into the target_locale, ensuring the meaning is preserved.
24+
4. Adjust the translation if necessary to keep the length similar to the original title.
25+
5. Review your translation for accuracy and naturalness in the target_locale.
26+
27+
The text to translate will be provided in JSON format with the following structure:
28+
{"content": "Title to translate", "target_locale": "Target language code"}
29+
30+
Provide your translation in the following JSON format:
31+
{"translation": "Your target_locale translation here"}
32+
33+
Here are three examples of correct translation
34+
35+
Original: {"content":"New Update for Minecraft Adds Underwater Temples", "target_locale":"es"}
36+
Correct translation: {"translation": "Nueva actualización para Minecraft añade templos submarinos"}
37+
38+
Original: {"content":"Toyota announces revolutionary battery technology", "target_locale":"fr"}
39+
Correct translation: {"translation": "Toyota annonce une technologie de batteries révolutionnaire"}
40+
41+
Original: {"content": "Heathrow fechado: paralisação de voos deve continuar nos próximos dias, diz gestora do aeroporto de Londres", "target_locale": "en"}
42+
Correct translation: {"translation": "Heathrow closed: flight disruption expected to continue in coming days, says London airport management"}
43+
44+
Remember to keep proper nouns like "Minecraft" and "Toyota" in their original form. Translate the title now and provide your answer in the specified JSON format.
45+
PROMPT
46+
end
47+
48+
def response_format
49+
[{ "key" => "translation", "type" => "string" }]
50+
end
51+
52+
def temperature
53+
0.3
54+
end
55+
end
56+
end
57+
end

lib/translation/base_translator.rb

Lines changed: 36 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -3,62 +3,63 @@
33
module DiscourseAi
44
module Translation
55
class BaseTranslator
6-
def initialize(text:, target_locale:, topic_id: nil, post_id: nil)
6+
def initialize(text:, target_locale:, topic: nil, post: nil)
77
@text = text
88
@target_locale = target_locale
9-
@topic_id = topic_id
10-
@post_id = post_id
9+
@topic = topic
10+
@post = post
1111
end
1212

1313
def translate
14-
prompt =
15-
DiscourseAi::Completions::Prompt.new(
16-
prompt_template,
17-
messages: [{ type: :user, content: formatted_content, id: "user" }],
18-
topic_id: @topic_id,
19-
post_id: @post_id,
14+
return nil if !SiteSetting.ai_translation_enabled
15+
if (ai_persona = AiPersona.find_by(id: persona_setting)).blank?
16+
return nil
17+
end
18+
19+
persona_klass = ai_persona.class_instance
20+
persona = persona_klass.new
21+
22+
llm_model = LlmModel.find_by(id: preferred_llm_model(persona_klass))
23+
return nil if llm_model.blank?
24+
25+
bot =
26+
DiscourseAi::Personas::Bot.as(
27+
ai_persona.user || Discourse.system_user,
28+
persona: persona,
29+
model: llm_model,
2030
)
2131

22-
structured_output =
23-
DiscourseAi::Completions::Llm.proxy(SiteSetting.ai_translation_model).generate(
24-
prompt,
25-
user: Discourse.system_user,
32+
context =
33+
DiscourseAi::Personas::BotContext.new(
34+
user: ai_persona.user || Discourse.system_user,
35+
skip_tool_details: true,
2636
feature_name: "translation",
27-
response_format: response_format,
37+
messages: [{ type: :user, content: formatted_content }],
38+
topic: @topic,
39+
post: @post,
2840
)
2941

42+
structured_output = nil
43+
bot.reply(context) do |partial, _, type|
44+
structured_output = partial if type == :structured_output
45+
end
46+
3047
structured_output&.read_buffered_property(:translation)
3148
end
3249

3350
def formatted_content
3451
{ content: @text, target_locale: @target_locale }.to_json
3552
end
3653

37-
def response_format
38-
{
39-
type: "json_schema",
40-
json_schema: {
41-
name: "reply",
42-
schema: {
43-
type: "object",
44-
properties: {
45-
translation: {
46-
type: "string",
47-
},
48-
},
49-
required: ["translation"],
50-
additionalProperties: false,
51-
},
52-
strict: true,
53-
},
54-
}
55-
end
56-
5754
private
5855

59-
def prompt_template
56+
def persona_setting
6057
raise NotImplementedError
6158
end
59+
60+
def preferred_llm_model(persona_klass)
61+
persona_klass.default_llm_id || SiteSetting.ai_translation_model&.split(":")&.last
62+
end
6263
end
6364
end
6465
end

0 commit comments

Comments
 (0)