diff --git a/app/services/discourse_ai/base_translator.rb b/app/services/discourse_ai/base_translator.rb new file mode 100644 index 00000000..da2ed24a --- /dev/null +++ b/app/services/discourse_ai/base_translator.rb @@ -0,0 +1,34 @@ +# frozen_string_literal: true + +module DiscourseAi + class BaseTranslator + def initialize(text, target_language) + @text = text + @target_language = target_language + end + + def translate + prompt = + DiscourseAi::Completions::Prompt.new( + build_prompt(@target_language), + messages: [{ type: :user, content: "#{@text}", id: "user" }], + ) + + DiscourseAi::Completions::Llm.proxy(SiteSetting.ai_helper_model).generate( + prompt, + user: Discourse.system_user, + feature_name: "translator-translate", + ) + end + + private + + def build_prompt(target_language) + prompt_template % { target_language: target_language } + end + + def prompt_template + raise NotImplementedError + end + end +end diff --git a/app/services/discourse_ai/post_translator.rb b/app/services/discourse_ai/post_translator.rb new file mode 100644 index 00000000..1e7adc03 --- /dev/null +++ b/app/services/discourse_ai/post_translator.rb @@ -0,0 +1,22 @@ +# frozen_string_literal: true + +module DiscourseAi + class PostTranslator < BaseTranslator + PROMPT_TEMPLATE = <<~TEXT.freeze + Translate this content to "%{target_language}". You must: + 1. Translate the content accurately while preserving any Markdown, HTML elements, or newlines + 2. Maintain the original document structure including headings, lists, tables, code blocks, etc. + 3. Preserve all links, images, and other media references without translation + 4. Handle code snippets appropriately - don't translate variable names, functions, or syntax within code blocks (```), but translate comments + 5. When encountering technical terminology, provide the accepted target language term if it exists, or transliterate if no equivalent exists, with the original term in parentheses + 6. For ambiguous terms or phrases, choose the most contextually appropriate translation + 7. Do not add any content besides the translation + 8. The translation must not have other languages other than the original and the target language + 9. You are being consumed via an API, only EVER return the translated text, do not return any other information + TEXT + + private def prompt_template + PROMPT_TEMPLATE + end + end +end diff --git a/app/services/discourse_ai/topic_translator.rb b/app/services/discourse_ai/topic_translator.rb new file mode 100644 index 00000000..1ff92f91 --- /dev/null +++ b/app/services/discourse_ai/topic_translator.rb @@ -0,0 +1,15 @@ +# frozen_string_literal: true + +module DiscourseAi + class TopicTranslator < BaseTranslator + PROMPT_TEMPLATE = <<~TEXT.freeze + Translate this topic title to "%{target_language}" + - Keep the original language when it is a proper noun or technical term + - The translation should be around the same length as the original + TEXT + + private def prompt_template + PROMPT_TEMPLATE + end + end +end diff --git a/app/services/discourse_ai/translator.rb b/app/services/discourse_ai/translator.rb deleted file mode 100644 index d3f181ee..00000000 --- a/app/services/discourse_ai/translator.rb +++ /dev/null @@ -1,41 +0,0 @@ -# frozen_string_literal: true - -module DiscourseAi - class Translator - PROMPT_TEMPLATE = <<~TEXT.freeze - You are an expert translator specializing in converting Markdown content from any source language to target locale "%{target_language}". Your task is to: - 1. Translate the content accurately while preserving all Markdown formatting elements - 2. Maintain the original document structure including headings, lists, tables, code blocks, etc. - 3. Preserve all links, images, and other media references without translation - 4. Handle code snippets appropriately - don't translate variable names, functions, or syntax within code blocks (```), but translate comments - 5. When encountering technical terminology, provide the accepted target language term if it exists, or transliterate if no equivalent exists, with the original term in parentheses - 6. For ambiguous terms or phrases, choose the most contextually appropriate translation - 7. You are being consumed via an API, only EVER return the translated text, do not return any other information - TEXT - - def initialize(text, target_language) - @text = text - @target_language = target_language - end - - def translate - prompt = - DiscourseAi::Completions::Prompt.new( - build_prompt(@target_language), - messages: [{ type: :user, content: "#{@text}", id: "user" }], - ) - - DiscourseAi::Completions::Llm.proxy(SiteSetting.ai_helper_model).generate( - prompt, - user: Discourse.system_user, - feature_name: "translator-translate", - ) - end - - private - - def build_prompt(target_language) - PROMPT_TEMPLATE % { target_language: target_language } - end - end -end diff --git a/app/services/discourse_translator/discourse_ai.rb b/app/services/discourse_translator/discourse_ai.rb index f9eb0a25..bad06938 100644 --- a/app/services/discourse_translator/discourse_ai.rb +++ b/app/services/discourse_translator/discourse_ai.rb @@ -33,11 +33,19 @@ def self.translate!(translatable, target_locale_sym = I18n.locale) ), ) end + + language = get_language_name(target_locale_sym) translated = - ::DiscourseAi::Translator.new( - text_for_translation(translatable, raw: true), - target_locale_sym, - ).translate + case translatable.class.name + when "Post" + ::DiscourseAi::PostTranslator.new( + text_for_translation(translatable, raw: true), + language, + ).translate + when "Topic" + ::DiscourseAi::TopicTranslator.new(text_for_translation(translatable), language).translate + end + DiscourseTranslator::TranslatedContentNormalizer.normalize(translatable, translated) end @@ -47,5 +55,10 @@ def self.required_settings_enabled SiteSetting.translator_enabled && SiteSetting.translator_provider == "DiscourseAi" && SiteSetting.discourse_ai_enabled && SiteSetting.ai_helper_enabled end + + def self.get_language_name(target_locale_sym) + LocaleSiteSetting.language_names.dig(target_locale_sym.to_s, "name") || + "locale \"#{target_locale_sym}\"" + end end end diff --git a/spec/services/discourse_ai/translator_spec.rb b/spec/services/discourse_ai/base_translator_spec.rb similarity index 75% rename from spec/services/discourse_ai/translator_spec.rb rename to spec/services/discourse_ai/base_translator_spec.rb index 7b9fc6e4..43178c15 100644 --- a/spec/services/discourse_ai/translator_spec.rb +++ b/spec/services/discourse_ai/base_translator_spec.rb @@ -2,7 +2,7 @@ require "rails_helper" -describe DiscourseAi::Translator do +describe DiscourseAi::BaseTranslator do before do Fabricate(:fake_model).tap do |fake_llm| SiteSetting.public_send("ai_helper_model=", "custom:#{fake_llm.id}") @@ -17,19 +17,21 @@ it "creates the correct prompt" do allow(DiscourseAi::Completions::Prompt).to receive(:new).with( <<~TEXT, - You are an expert translator specializing in converting Markdown content from any source language to target locale "de". Your task is to: - 1. Translate the content accurately while preserving all Markdown formatting elements + Translate this content to "de". You must: + 1. Translate the content accurately while preserving any Markdown, HTML elements, or newlines 2. Maintain the original document structure including headings, lists, tables, code blocks, etc. 3. Preserve all links, images, and other media references without translation 4. Handle code snippets appropriately - don't translate variable names, functions, or syntax within code blocks (```), but translate comments 5. When encountering technical terminology, provide the accepted target language term if it exists, or transliterate if no equivalent exists, with the original term in parentheses 6. For ambiguous terms or phrases, choose the most contextually appropriate translation - 7. You are being consumed via an API, only EVER return the translated text, do not return any other information + 7. Do not add any content besides the translation + 8. The translation must not have other languages other than the original and the target language + 9. You are being consumed via an API, only EVER return the translated text, do not return any other information TEXT messages: [{ type: :user, content: "cats are great", id: "user" }], ).and_call_original - described_class.new(text_to_translate, target_language).translate + DiscourseAi::PostTranslator.new(text_to_translate, target_language).translate end it "sends the translation prompt to the selected ai helper model" do @@ -46,13 +48,13 @@ feature_name: "translator-translate", ) - described_class.new(text_to_translate, target_language).translate + DiscourseAi::PostTranslator.new(text_to_translate, target_language).translate end it "returns the translation from the llm's response" do DiscourseAi::Completions::Llm.with_prepared_responses(["hur dur hur dur!"]) do expect( - described_class.new(text_to_translate, target_language).translate, + DiscourseAi::PostTranslator.new(text_to_translate, target_language).translate, ).to eq "hur dur hur dur!" end end diff --git a/spec/services/discourse_ai_spec.rb b/spec/services/discourse_ai_spec.rb index e0d67474..ebd6e371 100644 --- a/spec/services/discourse_ai_spec.rb +++ b/spec/services/discourse_ai_spec.rb @@ -1,9 +1,8 @@ # frozen_string_literal: true -require "rails_helper" - describe DiscourseTranslator::DiscourseAi do fab!(:post) + fab!(:topic) before do Fabricate(:fake_model).tap do |fake_llm| @@ -36,7 +35,10 @@ end describe ".translate" do - before { post.set_detected_locale("de") } + before do + post.set_detected_locale("de") + topic.set_detected_locale("de") + end it "translates the post and returns [locale, translated_text]" do DiscourseAi::Completions::Llm.with_prepared_responses(["some translated text"]) do @@ -45,5 +47,14 @@ expect(translated_text).to eq "
some translated text
" end end + + it "translates the topic" do + allow(::DiscourseAi::TopicTranslator).to receive(:new).and_call_original + DiscourseAi::Completions::Llm.with_prepared_responses(["some translated text"]) do + locale, translated_text = DiscourseTranslator::DiscourseAi.translate(topic) + expect(locale).to eq "de" + expect(translated_text).to eq "some translated text" + end + end end end