diff --git a/.discourse-compatibility b/.discourse-compatibility index 99519795..9b3f6f37 100644 --- a/.discourse-compatibility +++ b/.discourse-compatibility @@ -1,3 +1,4 @@ +< 3.5.0.beta4-dev: 14ca3c07efa0a80712a4cbb8ca455c32a727adec < 3.5.0.beta2-dev: 5f24835801fdc7cb98e1bcf42d2ab2e49e609921 < 3.5.0.beta1-dev: 7d411e458bdd449f8aead2bc07cedeb00b856798 < 3.4.0.beta3-dev: b4cf3a065884816fa3f770248c2bf908ba65d8ac diff --git a/app/jobs/regular/detect_posts_locale.rb b/app/jobs/regular/detect_posts_locale.rb new file mode 100644 index 00000000..835d13a8 --- /dev/null +++ b/app/jobs/regular/detect_posts_locale.rb @@ -0,0 +1,37 @@ +# frozen_string_literal: true + +module Jobs + class DetectPostsLocale < ::Jobs::Base + cluster_concurrency 1 + sidekiq_options retry: false + + BATCH_SIZE = 50 + + def execute(args) + return unless SiteSetting.translator_enabled + return unless SiteSetting.experimental_content_translation + + posts = + Post + .where(locale: nil) + .where(deleted_at: nil) + .where("posts.user_id > 0") + .where.not(raw: [nil, ""]) + .order(updated_at: :desc) + .limit(BATCH_SIZE) + return if posts.empty? + + posts.each do |post| + begin + DiscourseTranslator::PostLocaleDetector.detect_locale(post) + rescue => e + Rails.logger.error( + "Discourse Translator: Failed to detect post #{post.id}'s locale: #{e.message}", + ) + end + end + + DiscourseTranslator::VerboseLogger.log("Detected #{posts.size} post locales") + end + end +end diff --git a/app/jobs/regular/translate_posts.rb b/app/jobs/regular/translate_posts.rb new file mode 100644 index 00000000..9df90ce2 --- /dev/null +++ b/app/jobs/regular/translate_posts.rb @@ -0,0 +1,47 @@ +# frozen_string_literal: true + +module Jobs + class TranslatePosts < ::Jobs::Base + cluster_concurrency 1 + sidekiq_options retry: false + + BATCH_SIZE = 50 + + def execute(args) + return unless SiteSetting.translator_enabled + return unless SiteSetting.experimental_content_translation + + locales = SiteSetting.automatic_translation_target_languages.split("|") + return if locales.blank? + + locales.each do |locale| + posts = + Post + .joins( + "LEFT JOIN post_localizations pl ON pl.post_id = posts.id AND pl.locale = #{ActiveRecord::Base.connection.quote(locale)}", + ) + .where(deleted_at: nil) + .where("posts.user_id > 0") + .where.not(raw: [nil, ""]) + .where.not(locale: nil) + .where.not(locale: locale) + .where("pl.id IS NULL") + .limit(BATCH_SIZE) + + next if posts.empty? + + posts.each do |post| + begin + DiscourseTranslator::PostTranslator.translate(post, locale) + rescue => e + Rails.logger.error( + "Discourse Translator: Failed to translate post #{post.id} to #{locale}: #{e.message}", + ) + end + end + + DiscourseTranslator::VerboseLogger.log("Translated #{posts.size} posts to #{locale}") + end + end + end +end diff --git a/app/services/discourse_translator/post_locale_detector.rb b/app/services/discourse_translator/post_locale_detector.rb new file mode 100644 index 00000000..a4c348c9 --- /dev/null +++ b/app/services/discourse_translator/post_locale_detector.rb @@ -0,0 +1,14 @@ +# frozen_string_literal: true + +module DiscourseTranslator + class PostLocaleDetector + def self.detect_locale(post) + return if post.blank? + + translator = DiscourseTranslator::Provider::TranslatorProvider.get + detected_locale = translator.detect!(post) + post.update!(locale: detected_locale) + detected_locale + end + end +end diff --git a/app/services/discourse_translator/post_translator.rb b/app/services/discourse_translator/post_translator.rb new file mode 100644 index 00000000..baca32d7 --- /dev/null +++ b/app/services/discourse_translator/post_translator.rb @@ -0,0 +1,24 @@ +# frozen_string_literal: true + +module DiscourseTranslator + class PostTranslator + def self.translate(post, target_locale = I18n.locale) + return if post.blank? || target_locale.blank? || post.locale == target_locale.to_s + + target_locale_sym = target_locale.to_s.sub("-", "_").to_sym + + translator = DiscourseTranslator::Provider::TranslatorProvider.get + translated_raw = translator.translate_post!(post, target_locale_sym) + + localization = + PostLocalization.find_or_initialize_by(post_id: post.id, locale: target_locale_sym.to_s) + + localization.raw = translated_raw + localization.cooked = PrettyText.cook(translated_raw) + localization.post_version = post.version + localization.localizer_user_id = Discourse.system_user.id + localization.save! + localization + end + end +end diff --git a/app/services/discourse_translator/provider/base_provider.rb b/app/services/discourse_translator/provider/base_provider.rb index de44cc79..7c3f2429 100644 --- a/app/services/discourse_translator/provider/base_provider.rb +++ b/app/services/discourse_translator/provider/base_provider.rb @@ -55,12 +55,7 @@ def self.translate(translatable, target_locale_sym = I18n.locale) [detected_lang, translated] end - # Subclasses must implement this method to translate the text of a - # post or topic and return only the translated text. - # Subclasses should use text_for_translation - # @param translatable [Post|Topic] - # @param target_locale_sym [Symbol] - # @return [String] + # TODO: Deprecate this in favour of translate_ def self.translate_translatable!(translatable, target_locale_sym = I18n.locale) raise "Not Implemented" end @@ -69,6 +64,14 @@ def self.translate_text!(text, target_locale_sym = I18n.locale) raise "Not Implemented" end + def self.translate_post!(post, target_locale_sym = I18n.locale) + translate_translatable!(post, target_locale_sym) + end + + def self.translate_topic!(topic, target_locale_sym = I18n.locale) + translate_translatable!(topic, target_locale_sym) + end + # Returns the stored detected locale of a post or topic. # If the locale does not exist yet, it will be detected first via the API then stored. # @param translatable [Post|Topic] diff --git a/app/services/discourse_translator/provider/discourse_ai.rb b/app/services/discourse_translator/provider/discourse_ai.rb index c93f0f2d..8db9248b 100644 --- a/app/services/discourse_translator/provider/discourse_ai.rb +++ b/app/services/discourse_translator/provider/discourse_ai.rb @@ -16,25 +16,32 @@ def self.detect!(topic_or_post) end def self.translate_translatable!(translatable, target_locale_sym = I18n.locale) + if (translatable.class.name == "Post") + translate_post!(translatable, target_locale_sym) + elsif (translatable.class.name == "Topic") + translate_topic!(translatable, target_locale_sym) + end + end + + def self.translate_post!(post, target_locale_sym = I18n.locale) validate_required_settings! - language = get_language_name(target_locale_sym) + text = text_for_translation(post, raw: true) + chunks = DiscourseTranslator::ContentSplitter.split(text) translated = - case translatable.class.name - when "Post" - text = text_for_translation(translatable, raw: true) - chunks = DiscourseTranslator::ContentSplitter.split(text) - chunks - .map { |chunk| ::DiscourseAi::PostTranslator.new(chunk, target_locale_sym).translate } - .join("") - when "Topic" - ::DiscourseAi::TopicTranslator.new( - text_for_translation(translatable), - language, - ).translate - end + chunks + .map { |chunk| ::DiscourseAi::PostTranslator.new(chunk, target_locale_sym).translate } + .join("") + DiscourseTranslator::TranslatedContentNormalizer.normalize(post, translated) + end - DiscourseTranslator::TranslatedContentNormalizer.normalize(translatable, translated) + def self.translate_topic!(topic, target_locale_sym = I18n.locale) + validate_required_settings! + + language = get_language_name(target_locale_sym) + translated = + ::DiscourseAi::TopicTranslator.new(text_for_translation(topic), language).translate + DiscourseTranslator::TranslatedContentNormalizer.normalize(topic, translated) end def self.translate_text!(text, target_locale_sym = I18n.locale) diff --git a/spec/jobs/detect_posts_locale_spec.rb b/spec/jobs/detect_posts_locale_spec.rb new file mode 100644 index 00000000..056cb2fd --- /dev/null +++ b/spec/jobs/detect_posts_locale_spec.rb @@ -0,0 +1,81 @@ +# frozen_string_literal: true + +describe Jobs::DetectPostsLocale do + fab!(:post) { Fabricate(:post, locale: nil) } + subject(:job) { described_class.new } + + before do + SiteSetting.translator_enabled = true + SiteSetting.experimental_content_translation = true + end + + it "does nothing when translator is disabled" do + SiteSetting.translator_enabled = false + DiscourseTranslator::PostLocaleDetector.expects(:detect_locale).never + + job.execute({}) + end + + it "does nothing when content translation is disabled" do + SiteSetting.experimental_content_translation = false + DiscourseTranslator::PostLocaleDetector.expects(:detect_locale).never + + job.execute({}) + end + + it "does nothing when there are no posts to detect" do + Post.update_all(locale: "en") + DiscourseTranslator::PostLocaleDetector.expects(:detect_locale).never + + job.execute({}) + end + + it "detects locale for posts with nil locale" do + DiscourseTranslator::PostLocaleDetector.expects(:detect_locale).with(post).once + job.execute({}) + end + + it "detects most recently updated posts first" do + post_2 = Fabricate(:post, locale: nil) + post_3 = Fabricate(:post, locale: nil) + + post.update!(updated_at: 3.days.ago) + post_2.update!(updated_at: 2.day.ago) + post_3.update!(updated_at: 4.day.ago) + + original_batch = described_class::BATCH_SIZE + described_class.const_set(:BATCH_SIZE, 1) + + DiscourseTranslator::PostLocaleDetector.expects(:detect_locale).with(post_2).once + DiscourseTranslator::PostLocaleDetector.expects(:detect_locale).with(post).never + DiscourseTranslator::PostLocaleDetector.expects(:detect_locale).with(post_3).never + + job.execute({}) + ensure + described_class.const_set(:BATCH_SIZE, original_batch) + end + + it "skips bot posts" do + post.update!(user: Discourse.system_user) + DiscourseTranslator::PostLocaleDetector.expects(:detect_locale).with(post).never + + job.execute({}) + end + + it "handles detection errors gracefully" do + DiscourseTranslator::PostLocaleDetector + .expects(:detect_locale) + .with(post) + .raises(StandardError.new("jiboomz")) + .once + + expect { job.execute({}) }.not_to raise_error + end + + it "logs a summary after running" do + DiscourseTranslator::PostLocaleDetector.stubs(:detect_locale) + DiscourseTranslator::VerboseLogger.expects(:log).with(includes("Detected 1 post locales")) + + job.execute({}) + end +end diff --git a/spec/jobs/translate_posts_spec.rb b/spec/jobs/translate_posts_spec.rb new file mode 100644 index 00000000..0c30df02 --- /dev/null +++ b/spec/jobs/translate_posts_spec.rb @@ -0,0 +1,122 @@ +# frozen_string_literal: true + +describe Jobs::TranslatePosts do + fab!(:post) + subject(:job) { described_class.new } + + let(:locales) { %w[en ja de] } + + before do + SiteSetting.translator_enabled = true + SiteSetting.experimental_content_translation = true + SiteSetting.automatic_translation_backfill_rate = 1 + SiteSetting.automatic_translation_target_languages = locales.join("|") + end + + it "does nothing when translator is disabled" do + SiteSetting.translator_enabled = false + DiscourseTranslator::PostTranslator.expects(:translate).never + + job.execute({}) + end + + it "does nothing when content translation is disabled" do + SiteSetting.experimental_content_translation = false + DiscourseTranslator::PostTranslator.expects(:translate).never + + job.execute({}) + end + + it "does nothing when no target languages are configured" do + SiteSetting.automatic_translation_target_languages = "" + DiscourseTranslator::PostTranslator.expects(:translate).never + + job.execute({}) + end + + it "does nothing when there are no posts to translate" do + Post.destroy_all + DiscourseTranslator::PostTranslator.expects(:translate).never + + job.execute({}) + end + + it "skips posts that already have localizations" do + Post.all.each do |post| + Fabricate(:post_localization, post:, locale: "en") + Fabricate(:post_localization, post:, locale: "ja") + end + DiscourseTranslator::PostTranslator.expects(:translate).never + + job.execute({}) + end + + it "skips bot posts" do + post.update!(user: Discourse.system_user) + DiscourseTranslator::PostTranslator.expects(:translate).with(post, "en").never + DiscourseTranslator::PostTranslator.expects(:translate).with(post, "ja").never + + job.execute({}) + end + + it "handles translation errors gracefully" do + post.update(locale: "es") + DiscourseTranslator::PostTranslator + .expects(:translate) + .with(post, "en") + .raises(StandardError.new("API error")) + DiscourseTranslator::PostTranslator.expects(:translate).with(post, "ja").once + DiscourseTranslator::PostTranslator.expects(:translate).with(post, "de").once + + expect { job.execute({}) }.not_to raise_error + end + + it "logs a summary after translation" do + post.update(locale: "es") + DiscourseTranslator::PostTranslator.stubs(:translate) + DiscourseTranslator::VerboseLogger.expects(:log).with(includes("Translated 1 posts to en")) + DiscourseTranslator::VerboseLogger.expects(:log).with(includes("Translated 1 posts to ja")) + DiscourseTranslator::VerboseLogger.expects(:log).with(includes("Translated 1 posts to de")) + + job.execute({}) + end + + context "for translation scenarios" do + it "scenario 1: skips post when locale is not set" do + DiscourseTranslator::PostTranslator.expects(:translate).never + + job.execute({}) + end + + it "scenario 2: returns post with locale 'es' if localizations for en/ja/de do not exist" do + post = Fabricate(:post, locale: "es") + + DiscourseTranslator::PostTranslator.expects(:translate).with(post, "en").once + DiscourseTranslator::PostTranslator.expects(:translate).with(post, "ja").once + DiscourseTranslator::PostTranslator.expects(:translate).with(post, "de").once + + job.execute({}) + end + + it "scenario 3: returns post with locale 'en' if ja/de localization does not exist" do + post = Fabricate(:post, locale: "en") + + DiscourseTranslator::PostTranslator.expects(:translate).with(post, "ja").once + DiscourseTranslator::PostTranslator.expects(:translate).with(post, "de").once + DiscourseTranslator::PostTranslator.expects(:translate).with(post, "en").never + + job.execute({}) + end + + it "scenario 4: skips post with locale 'en' if 'ja' localization already exists" do + post = Fabricate(:post, locale: "en") + Fabricate(:post_localization, post: post, locale: "ja") + + DiscourseTranslator::PostTranslator.expects(:translate).with(post, "en").never + DiscourseTranslator::PostTranslator.expects(:translate).with(post, "ja").never + DiscourseTranslator::PostTranslator.expects(:translate).with(post, "de").once + + job.execute({}) + end + end +end diff --git a/spec/services/post_locale_detector_spec.rb b/spec/services/post_locale_detector_spec.rb new file mode 100644 index 00000000..daebab7b --- /dev/null +++ b/spec/services/post_locale_detector_spec.rb @@ -0,0 +1,27 @@ +# frozen_string_literal: true + +describe DiscourseTranslator::PostLocaleDetector do + describe ".detect_locale" do + fab!(:post) { Fabricate(:post, raw: "Hello world", locale: nil) } + + let(:translator) { mock } + + before { DiscourseTranslator::Provider::TranslatorProvider.stubs(:get).returns(translator) } + + it "returns nil if post is blank" do + expect(described_class.detect_locale(nil)).to eq(nil) + end + + it "calls detect! on the provider with the post" do + translator.expects(:detect!).with(post).returns("ja") + expect(described_class.detect_locale(post)).to eq("ja") + end + + it "updates the post locale with the detected locale" do + translator.stubs(:detect!).with(post).returns("ja") + expect { described_class.detect_locale(post) }.to change { post.reload.locale }.from(nil).to( + "ja", + ) + end + end +end diff --git a/spec/services/post_translator_spec.rb b/spec/services/post_translator_spec.rb new file mode 100644 index 00000000..88891b5e --- /dev/null +++ b/spec/services/post_translator_spec.rb @@ -0,0 +1,69 @@ +# frozen_string_literal: true + +describe DiscourseTranslator::PostTranslator do + describe ".translate" do + fab!(:post) { Fabricate(:post, raw: "Hello world", version: 1) } + let(:translator) { mock } + let(:translated_raw) { "こんにちは世界" } + let(:cooked) { "

こんにちは世界

" } + let(:target_locale) { "ja" } + + before do + DiscourseTranslator::Provider::TranslatorProvider.stubs(:get).returns(translator) + translator.stubs(:translate_post!).with(post, :ja).returns(translated_raw) + end + + it "returns nil if post is blank" do + expect(described_class.translate(nil, "ja")).to eq(nil) + end + + it "returns nil if target_locale is blank" do + expect(described_class.translate(post, nil)).to eq(nil) + expect(described_class.translate(post, "")).to eq(nil) + end + + it "returns nil if target_locale is same as post locale" do + post.locale = "en" + + expect(described_class.translate(post, "en")).to eq(nil) + end + + it "translates with post and locale" do + translator.expects(:translate_post!).with(post, :ja).returns(translated_raw) + + described_class.translate(post, "ja") + end + + it "normalizes dashes to underscores and symbol type for locale" do + translator.expects(:translate_post!).with(post, :zh_CN).returns("你好,世界") + + described_class.translate(post, "zh-CN") + end + + it "finds or creates a PostLocalization and sets its fields" do + expect { + res = described_class.translate(post, target_locale) + expect(res).to be_a(PostLocalization) + expect(res).to have_attributes( + post_id: post.id, + locale: target_locale, + raw: translated_raw, + cooked: cooked, + post_version: post.version, + localizer_user_id: Discourse.system_user.id, + ) + }.to change { PostLocalization.count }.by(1) + end + + it "updates an existing PostLocalization if present" do + localization = + Fabricate(:post_localization, post: post, locale: "ja", raw: "old", cooked: "old_cooked") + expect { + out = described_class.translate(post, "ja") + expect(out.id).to eq(localization.id) + expect(out.raw).to eq(translated_raw) + expect(out.cooked).to eq(cooked) + }.to_not change { PostLocalization.count } + end + end +end