Skip to content
This repository was archived by the owner on Jul 22, 2025. It is now read-only.

Commit b5e8277

Browse files
authored
DEV: Move AI translation feature into an AI Feature (#1424)
This PR moves translations into an AI Feature See #1424 for screenshots
1 parent 9be1049 commit b5e8277

24 files changed

+437
-257
lines changed

config/locales/client.en.yml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,15 @@ en:
227227
custom_prompt: "Custom prompt"
228228
image_caption: "Caption images"
229229

230+
translation:
231+
name: "Translation"
232+
description: "Translates content into supported languages"
233+
locale_detector: "Locale detector"
234+
post_raw_translator: "Post raw translator"
235+
topic_title_translator: "Topic title translator"
236+
short_text_translator: "Short text translator"
237+
238+
230239
modals:
231240
select_option: "Select an option..."
232241

config/locales/server.en.yml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -373,6 +373,18 @@ en:
373373
image_captioner:
374374
name: "Image captions"
375375
description: "Default persona powering the Helper's image caption feature"
376+
locale_detector:
377+
name: "Locale detector"
378+
description: "Powers the translation feature by detecting the locale of a given text (posts, titles, etc.)"
379+
post_raw_translator:
380+
name: "Post translator"
381+
description: "Powers the translation feature by translating posts containing Discourse Markdown"
382+
topic_title_translator:
383+
name: "Topic title translator"
384+
description: "Powers the translation feature by translating topic titles"
385+
short_text_translator:
386+
name: "Short text translator"
387+
description: "Powers the translation feature by as a generic text translator, used for short texts like category names or tags"
376388

377389
topic_not_found: "Summary unavailable, topic not found!"
378390
summarizing: "Summarizing topic"

config/settings.yml

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -452,28 +452,54 @@ discourse_ai:
452452
default: false
453453
client: true
454454
validator: "DiscourseAi::Configuration::LlmDependencyValidator"
455+
area: "ai-features/translation"
455456
ai_translation_model:
456457
default: ""
457458
type: enum
458459
allow_any: false
459460
enum: "DiscourseAi::Configuration::LlmEnumerator"
460461
validator: "DiscourseAi::Configuration::LlmValidator"
462+
area: "ai-features/translation"
463+
ai_translation_locale_detector_persona:
464+
default: "-27"
465+
type: enum
466+
enum: "DiscourseAi::Configuration::PersonaEnumerator"
467+
area: "ai-features/translation"
468+
ai_translation_post_raw_translator_persona:
469+
default: "-28"
470+
type: enum
471+
enum: "DiscourseAi::Configuration::PersonaEnumerator"
472+
area: "ai-features/translation"
473+
ai_translation_topic_title_translator_persona:
474+
default: "-29"
475+
type: enum
476+
enum: "DiscourseAi::Configuration::PersonaEnumerator"
477+
area: "ai-features/translation"
478+
ai_translation_short_text_translator_persona:
479+
default: "-30"
480+
type: enum
481+
enum: "DiscourseAi::Configuration::PersonaEnumerator"
482+
area: "ai-features/translation"
461483
ai_translation_backfill_rate:
462484
default: 0
485+
min: 0
486+
max: 1000
463487
client: false
464488
hidden: true
489+
area: "ai-features/translation"
465490
ai_translation_backfill_limit_to_public_content:
466491
default: true
467492
client: false
468-
hidden: true
493+
area: "ai-features/translation"
469494
ai_translation_backfill_max_age_days:
470495
default: 5
471496
client: false
472-
hidden: true
497+
area: "ai-features/translation"
473498
ai_translation_verbose_logs:
474499
default: false
475500
client: false
476501
hidden: true
502+
area: "ai-features/translation"
477503

478504
inferred_concepts_enabled:
479505
default: false

lib/configuration/feature.rb

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,13 +118,43 @@ def ai_helper_features
118118
]
119119
end
120120

121+
def translation_features
122+
feature_cache[:translation] ||= [
123+
new(
124+
"locale_detector",
125+
"ai_translation_locale_detector_persona",
126+
DiscourseAi::Configuration::Module::TRANSLATION_ID,
127+
DiscourseAi::Configuration::Module::TRANSLATION,
128+
),
129+
new(
130+
"post_raw_translator",
131+
"ai_translation_post_raw_translator_persona",
132+
DiscourseAi::Configuration::Module::TRANSLATION_ID,
133+
DiscourseAi::Configuration::Module::TRANSLATION,
134+
),
135+
new(
136+
"topic_title_translator",
137+
"ai_translation_topic_title_translator_persona",
138+
DiscourseAi::Configuration::Module::TRANSLATION_ID,
139+
DiscourseAi::Configuration::Module::TRANSLATION,
140+
),
141+
new(
142+
"short_text_translator",
143+
"ai_translation_short_text_translator_persona",
144+
DiscourseAi::Configuration::Module::TRANSLATION_ID,
145+
DiscourseAi::Configuration::Module::TRANSLATION,
146+
),
147+
]
148+
end
149+
121150
def all
122151
[
123152
summarization_features,
124153
search_features,
125154
discord_features,
126155
inference_features,
127156
ai_helper_features,
157+
translation_features,
128158
].flatten
129159
end
130160

lib/configuration/module.rb

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,16 @@ class Module
88
DISCORD = "discord"
99
INFERENCE = "inference"
1010
AI_HELPER = "ai_helper"
11+
TRANSLATION = "translation"
1112

12-
NAMES = [SUMMARIZATION, SEARCH, DISCORD, INFERENCE, AI_HELPER]
13+
NAMES = [SUMMARIZATION, SEARCH, DISCORD, INFERENCE, AI_HELPER, TRANSLATION]
1314

1415
SUMMARIZATION_ID = 1
1516
SEARCH_ID = 2
1617
DISCORD_ID = 3
1718
INFERENCE_ID = 4
1819
AI_HELPER_ID = 5
20+
TRANSLATION_ID = 6
1921

2022
class << self
2123
def all
@@ -50,6 +52,12 @@ def all
5052
"ai_helper_enabled",
5153
features: DiscourseAi::Configuration::Feature.ai_helper_features,
5254
),
55+
new(
56+
TRANSLATION_ID,
57+
TRANSLATION,
58+
"ai_translation_enabled",
59+
features: DiscourseAi::Configuration::Feature.translation_features,
60+
),
5361
]
5462
end
5563

lib/personas/bot_context.rb

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ class BotContext
2525

2626
def initialize(
2727
post: nil,
28+
topic: nil,
2829
participants: nil,
2930
user: nil,
3031
skip_tool_details: nil,
@@ -70,7 +71,14 @@ def initialize(
7071
@topic_id = post.topic_id
7172
@private_message = post.topic.private_message?
7273
@participants ||= post.topic.allowed_users.map(&:username).join(", ") if @private_message
73-
@user = post.user
74+
@user ||= post.user
75+
end
76+
77+
if topic
78+
@topic_id ||= topic.id
79+
@private_message ||= topic.private_message?
80+
@participants ||= topic.allowed_users.map(&:username).join(", ") if @private_message
81+
@user ||= topic.user
7482
end
7583
end
7684

lib/personas/locale_detector.rb

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
# frozen_string_literal: true
2+
3+
module DiscourseAi
4+
module Personas
5+
class LocaleDetector < Persona
6+
def self.default_enabled
7+
false
8+
end
9+
10+
def system_prompt
11+
<<~PROMPT.strip
12+
You will be given a piece of text, and your task is to detect the locale (language) of the text and return it in a specific JSON format.
13+
14+
To complete this task, follow these steps:
15+
16+
1. Carefully read and analyze the provided text.
17+
2. Determine the language of the text based on its characteristics, such as vocabulary, grammar, and sentence structure.
18+
3. Do not use links or programming code in the text to detect the locale
19+
4. Identify the appropriate language code for the detected language.
20+
21+
Here is a list of common language codes for reference:
22+
- English: en
23+
- Spanish: es
24+
- French: fr
25+
- German: de
26+
- Italian: it
27+
- Brazilian Portuguese: pt-BR
28+
- Russian: ru
29+
- Simplified Chinese: zh-CN
30+
- Japanese: ja
31+
- Korean: ko
32+
33+
If the language is not in this list, use the appropriate IETF language tag code.
34+
35+
5. Format your response as a JSON object with a single key "locale" and the value as the language code.
36+
37+
Your output should be in the following format:
38+
<output>
39+
{"locale": "xx"}
40+
</output>
41+
42+
Where "xx" is replaced by the appropriate language code.
43+
44+
Important: Base your analysis solely on the provided text. Do not use any external information or make assumptions about the text's origin or context beyond what is explicitly provided.
45+
PROMPT
46+
end
47+
48+
def response_format
49+
[{ "key" => "locale", "type" => "string" }]
50+
end
51+
52+
def temperature
53+
0
54+
end
55+
end
56+
end
57+
end

lib/personas/persona.rb

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,10 @@ def system_personas
6464
Tutor => -24,
6565
Translator => -25,
6666
ImageCaptioner => -26,
67+
LocaleDetector => -27,
68+
PostRawTranslator => -28,
69+
TopicTitleTranslator => -29,
70+
ShortTextTranslator => -30,
6771
}
6872
end
6973

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
# frozen_string_literal: true
2+
3+
module DiscourseAi
4+
module Personas
5+
class PostRawTranslator < Persona
6+
def self.default_enabled
7+
false
8+
end
9+
10+
def system_prompt
11+
<<~PROMPT.strip
12+
You are a highly skilled translator tasked with translating content from one language to another. Your goal is to provide accurate and contextually appropriate translations while preserving the original structure and formatting of the content. Follow these instructions carefully:
13+
14+
Translation Instructions:
15+
1. Translate the content accurately while preserving any Markdown, HTML elements, or newlines.
16+
2. Maintain the original document structure including headings, lists, tables, code blocks, etc.
17+
3. Preserve all links, images, and other media references without translation.
18+
4. Handle code snippets appropriately:
19+
- Do not translate variable names, functions, or syntax within code blocks (```).
20+
- Translate comments within code blocks.
21+
5. For technical terminology:
22+
- Provide the accepted target language term if it exists.
23+
- If no equivalent exists, transliterate the term and include the original term in parentheses.
24+
6. For ambiguous terms or phrases, choose the most contextually appropriate translation.
25+
7. Do not add any content besides the translation.
26+
8. Ensure the translation only contains the original language and the target language.
27+
28+
The text to translate will be provided in JSON format with the following structure:
29+
{"content": "Text to translate", "target_locale": "Target language code"}
30+
31+
Output your translation in the following JSON format:
32+
{"translation": "Your translated text here"}
33+
34+
Here are three examples of correct translations:
35+
36+
Original: {"content":"New Update for Minecraft Adds Underwater Temples", "target_locale":"Spanish"}
37+
Correct translation: {"translation": "Nueva actualización para Minecraft añade templos submarinos"}
38+
39+
Original: {"content": "# Machine Learning 101\n\nMachine Learning (ML) is a subset of Artificial Intelligence (AI) that focuses on the development of algorithms and statistical models that enable computer systems to improve their performance on a specific task through experience.\n\n## Key Concepts\n\n1. **Supervised Learning**: The algorithm learns from labeled training data.\n2. **Unsupervised Learning**: The algorithm finds patterns in unlabeled data.\n3. **Reinforcement Learning**: The algorithm learns through interaction with an environment.\n\n```python\n# Simple example of a machine learning model\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.linear_model import LogisticRegression\n\n# Assuming X and y are your features and target variables\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n\nmodel = LogisticRegression()\nmodel.fit(X_train, y_train)\n\n# Evaluate the model\naccuracy = model.score(X_test, y_test)\nprint(f'Model accuracy: {accuracy}')\n```\n\nFor more information, visit [Machine Learning on Wikipedia](https://en.wikipedia.org/wiki/Machine_learning).", "target_locale":"French"}
40+
Correct translation: {"translation": "# Machine Learning 101\n\nLe Machine Learning (ML) est un sous-ensemble de l'Intelligence Artificielle (IA) qui se concentre sur le développement d'algorithmes et de modèles statistiques permettant aux systèmes informatiques d'améliorer leurs performances sur une tâche spécifique grâce à l'expérience.\n\n## Concepts clés\n\n1. **Apprentissage supervisé** : L'algorithme apprend à partir de données d'entraînement étiquetées.\n2. **Apprentissage non supervisé** : L'algorithme trouve des motifs dans des données non étiquetées.\n3. **Apprentissage par renforcement** : L'algorithme apprend à travers l'interaction avec un environnement.\n\n```python\n# Exemple simple d'un modèle de machine learning\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.linear_model import LogisticRegression\n\n# En supposant que X et y sont vos variables de caractéristiques et cibles\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n\nmodel = LogisticRegression()\nmodel.fit(X_train, y_train)\n\n# Évaluer le modèle\naccuracy = model.score(X_test, y_test)\nprint(f'Model accuracy: {accuracy}')\n```\n\nPour plus d'informations, visitez [Machine Learning sur Wikipedia](https://en.wikipedia.org/wiki/Machine_learning)."}
41+
42+
Original: {"content": "**Heathrow fechado**: paralisação de voos deve continuar nos próximos dias, diz gestora do aeroporto de *Londres*", "target_locale": "English"}
43+
Correct translation: {"translation": "**Heathrow closed**: flight disruption expected to continue in coming days, says *London* airport management"}
44+
45+
Remember, you are being consumed via an API. Only return the translated text in the specified JSON format. Do not include any additional information or explanations in your response.
46+
PROMPT
47+
end
48+
49+
def response_format
50+
[{ "key" => "translation", "type" => "string" }]
51+
end
52+
53+
def temperature
54+
0.3
55+
end
56+
end
57+
end
58+
end
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
# frozen_string_literal: true
2+
3+
module DiscourseAi
4+
module Personas
5+
class ShortTextTranslator < Persona
6+
def self.default_enabled
7+
false
8+
end
9+
10+
def system_prompt
11+
<<~PROMPT.strip
12+
You are a translation service specializing in translating short pieces of text or a few words.
13+
These words may be things like a name, description, or title. Adhere to the following guidelines:
14+
15+
1. Keep proper nouns and technical terms in their original language
16+
2. Keep the translated content close to the original length
17+
3. Translation maintains the original meaning
18+
4. Preserve any Markdown, HTML elements, links, parenthesis, or newlines
19+
20+
The text to translate will be provided in JSON format with the following structure:
21+
{"content": "Text to translate", "target_locale": "Target language code"}
22+
23+
Provide your translation in the following JSON format:
24+
{"translation": "target_locale translation here"}
25+
26+
Here are three examples of correct translation
27+
28+
Original: {"content":"Japan", "target_locale":"es"}
29+
Correct translation: {"translation": "Japón"}
30+
31+
Original: {"content":"Cats and Dogs", "target_locale":"zh_CN"}
32+
Correct translation: {"translation": "猫和狗"}
33+
34+
Original: {"content": "Q&A", "target_locale": "pt"}
35+
Correct translation: {"translation": "Perguntas e Respostas"}
36+
37+
Remember to keep proper nouns like "Minecraft" and "Toyota" in their original form. Translate the text now and provide your answer in the specified JSON format.
38+
PROMPT
39+
end
40+
41+
def response_format
42+
[{ "key" => "translation", "type" => "string" }]
43+
end
44+
45+
def temperature
46+
0.3
47+
end
48+
end
49+
end
50+
end

0 commit comments

Comments
 (0)