diff --git a/app/jobs/regular/detect_translate_topic.rb b/app/jobs/regular/detect_translate_topic.rb new file mode 100644 index 00000000..a1c03cec --- /dev/null +++ b/app/jobs/regular/detect_translate_topic.rb @@ -0,0 +1,33 @@ +# frozen_string_literal: true + +module Jobs + class DetectTranslateTopic < ::Jobs::Base + def execute(args) + return unless SiteSetting.translator_enabled + return unless SiteSetting.experimental_content_translation + return if args[:topic_id].blank? + + topic = Topic.find(args[:topic_id]) + if topic.blank? || topic.title.blank? || topic.deleted_at.present? || topic.user_id <= 0 + return + end + + detected_locale = DiscourseTranslator::TopicLocaleDetector.detect_locale(topic) + + locales = SiteSetting.automatic_translation_target_languages.split("|") + return if locales.blank? + + locales.each do |locale| + next if locale == detected_locale + + begin + DiscourseTranslator::TopicTranslator.translate(topic, locale) + rescue => e + Rails.logger.error( + "Discourse Translator: Failed to translate topic #{topic.id} to #{locale}: #{e.message}", + ) + end + end + end + end +end diff --git a/app/jobs/regular/translate_topics.rb b/app/jobs/regular/translate_topics.rb new file mode 100644 index 00000000..a2ba4033 --- /dev/null +++ b/app/jobs/regular/translate_topics.rb @@ -0,0 +1,48 @@ +# frozen_string_literal: true + +module Jobs + class TranslateTopics < ::Jobs::Base + cluster_concurrency 1 + sidekiq_options retry: false + + BATCH_SIZE = 50 + + def execute(args) + return unless SiteSetting.translator_enabled + return unless SiteSetting.experimental_content_translation + + locales = SiteSetting.automatic_translation_target_languages.split("|") + return if locales.blank? + + limit = args[:limit] || BATCH_SIZE + + locales.each do |locale| + topics = + Topic + .joins( + "LEFT JOIN topic_localizations tl ON tl.topic_id = topics.id AND tl.locale = #{ActiveRecord::Base.connection.quote(locale)}", + ) + .where(deleted_at: nil) + .where("topics.user_id > 0") + .where.not(locale: nil) + .where.not(locale: locale) + .where("tl.id IS NULL") + .limit(limit) + + next if topics.empty? + + topics.each do |topic| + begin + DiscourseTranslator::TopicTranslator.translate(topic, locale) + rescue => e + Rails.logger.error( + "Discourse Translator: Failed to translate topic #{topic.id} to #{locale}: #{e.message}", + ) + end + end + + DiscourseTranslator::VerboseLogger.log("Translated #{topics.size} topics to #{locale}") + end + end + end +end diff --git a/app/jobs/scheduled/automatic_category_translation.rb b/app/jobs/scheduled/category_translation_backfill.rb similarity index 87% rename from app/jobs/scheduled/automatic_category_translation.rb rename to app/jobs/scheduled/category_translation_backfill.rb index c0841577..0463edfc 100644 --- a/app/jobs/scheduled/automatic_category_translation.rb +++ b/app/jobs/scheduled/category_translation_backfill.rb @@ -1,7 +1,7 @@ # frozen_string_literal: true module Jobs - class AutomaticCategoryTranslation < ::Jobs::Scheduled + class CategoryTranslationBackfill < ::Jobs::Scheduled every 12.hours cluster_concurrency 1 diff --git a/app/jobs/scheduled/topic_translation_backfill.rb b/app/jobs/scheduled/topic_translation_backfill.rb new file mode 100644 index 00000000..0e5f3757 --- /dev/null +++ b/app/jobs/scheduled/topic_translation_backfill.rb @@ -0,0 +1,18 @@ +# frozen_string_literal: true + +module Jobs + class TopicTranslationBackfill < ::Jobs::Scheduled + every 5.minutes + cluster_concurrency 1 + + def execute(args) + return unless SiteSetting.translator_enabled + return unless SiteSetting.experimental_content_translation + + return if SiteSetting.automatic_translation_target_languages.blank? + return if SiteSetting.automatic_translation_backfill_rate == 0 + + Jobs.enqueue(:translate_topics, limit: SiteSetting.automatic_translation_backfill_rate) + end + end +end diff --git a/app/jobs/scheduled/topics_locale_detection_backfill.rb b/app/jobs/scheduled/topics_locale_detection_backfill.rb new file mode 100644 index 00000000..fb0d30b8 --- /dev/null +++ b/app/jobs/scheduled/topics_locale_detection_backfill.rb @@ -0,0 +1,36 @@ +# frozen_string_literal: true + +module Jobs + class TopicsLocaleDetectionBackfill < ::Jobs::Scheduled + every 5.minutes + cluster_concurrency 1 + + def execute(args) + return unless SiteSetting.translator_enabled + return unless SiteSetting.experimental_content_translation + return if SiteSetting.automatic_translation_backfill_rate == 0 + + limit = SiteSetting.automatic_translation_backfill_rate + topics = + Topic + .where(locale: nil) + .where(deleted_at: nil) + .where("topics.user_id > 0") + .order(updated_at: :desc) + .limit(limit) + return if topics.empty? + + topics.each do |topic| + begin + DiscourseTranslator::TopicLocaleDetector.detect_locale(topic) + rescue => e + Rails.logger.error( + "Discourse Translator: Failed to detect topic #{topic.id}'s locale: #{e.message}", + ) + end + end + + DiscourseTranslator::VerboseLogger.log("Detected #{topics.size} topic locales") + end + end +end diff --git a/app/services/discourse_translator/topic_locale_detector.rb b/app/services/discourse_translator/topic_locale_detector.rb new file mode 100644 index 00000000..5be9d939 --- /dev/null +++ b/app/services/discourse_translator/topic_locale_detector.rb @@ -0,0 +1,15 @@ +# frozen_string_literal: true + +module DiscourseTranslator + class TopicLocaleDetector + def self.detect_locale(topic) + return if topic.blank? + + translator = DiscourseTranslator::Provider::TranslatorProvider.get + detected_locale = translator.detect!(topic) + locale = LocaleNormalizer.normalize_to_i18n(detected_locale) + topic.update!(locale:) + locale + end + end +end diff --git a/app/services/discourse_translator/topic_translator.rb b/app/services/discourse_translator/topic_translator.rb new file mode 100644 index 00000000..3df5f547 --- /dev/null +++ b/app/services/discourse_translator/topic_translator.rb @@ -0,0 +1,23 @@ +# frozen_string_literal: true + +module DiscourseTranslator + class TopicTranslator + def self.translate(topic, target_locale = I18n.locale) + return if topic.blank? || target_locale.blank? || topic.locale == target_locale.to_s + + target_locale_sym = target_locale.to_s.sub("-", "_").to_sym + + translator = DiscourseTranslator::Provider::TranslatorProvider.get + translated_title = translator.translate_topic!(topic, target_locale_sym) + + localization = + TopicLocalization.find_or_initialize_by(topic_id: topic.id, locale: target_locale_sym.to_s) + + localization.title = translated_title + localization.fancy_title = Topic.fancy_title(translated_title) + localization.localizer_user_id = Discourse.system_user.id + localization.save! + localization + end + end +end diff --git a/lib/discourse_translator/automatic_translations.rb b/lib/discourse_translator/automatic_translations.rb index e5944dc5..00111584 100644 --- a/lib/discourse_translator/automatic_translations.rb +++ b/lib/discourse_translator/automatic_translations.rb @@ -17,6 +17,10 @@ def inject(plugin) if translatable?(topic) Jobs.enqueue(:translate_translatable, type: "Topic", translatable_id: topic.id) end + + if SiteSetting.experimental_content_localization + Jobs.enqueue(:detect_translate_topic, topic_id: topic.id) + end end plugin.on(:topic_edited) do |topic| @@ -24,6 +28,12 @@ def inject(plugin) Jobs.enqueue(:translate_translatable, type: "Topic", translatable_id: topic.id) end end + + plugin.on(:post_edited) do |post, topic_changed| + if SiteSetting.experimental_content_localization && topic_changed + Jobs.enqueue(:detect_translate_topic, topic_id: post.topic_id) + end + end end def translatable?(content) diff --git a/spec/jobs/detect_translate_topic_spec.rb b/spec/jobs/detect_translate_topic_spec.rb new file mode 100644 index 00000000..d2830485 --- /dev/null +++ b/spec/jobs/detect_translate_topic_spec.rb @@ -0,0 +1,71 @@ +# frozen_string_literal: true + +describe Jobs::DetectTranslateTopic do + fab!(:topic) + subject(:job) { described_class.new } + + let(:locales) { %w[en ja] } + + before do + SiteSetting.translator_enabled = true + SiteSetting.experimental_content_translation = true + SiteSetting.automatic_translation_backfill_rate = 1 + SiteSetting.automatic_translation_target_languages = locales.join("|") + end + + it "does nothing when translator is disabled" do + SiteSetting.translator_enabled = false + DiscourseTranslator::TopicLocaleDetector.expects(:detect_locale).never + DiscourseTranslator::TopicTranslator.expects(:translate).never + + job.execute({ topic_id: topic.id }) + end + + it "does nothing when content translation is disabled" do + SiteSetting.experimental_content_translation = false + DiscourseTranslator::TopicLocaleDetector.expects(:detect_locale).never + DiscourseTranslator::TopicTranslator.expects(:translate).never + + job.execute({ topic_id: topic.id }) + end + + it "detects locale" do + SiteSetting.translator_enabled = true + DiscourseTranslator::TopicLocaleDetector.expects(:detect_locale).with(topic).once + DiscourseTranslator::TopicTranslator.expects(:translate).twice + + job.execute({ topic_id: topic.id }) + end + + it "skips bot topics" do + topic.update!(user: Discourse.system_user) + DiscourseTranslator::TopicTranslator.expects(:translate).never + + job.execute({ topic_id: topic.id }) + end + + it "does not translate when no target languages are configured" do + SiteSetting.automatic_translation_target_languages = "" + DiscourseTranslator::TopicLocaleDetector.expects(:detect_locale).with(topic).returns("en") + DiscourseTranslator::TopicTranslator.expects(:translate).never + + job.execute({ topic_id: topic.id }) + end + + it "skips translating to the topic's language" do + topic.update(locale: "en") + DiscourseTranslator::TopicLocaleDetector.expects(:detect_locale).with(topic).returns("en") + DiscourseTranslator::TopicTranslator.expects(:translate).with(topic, "en").never + DiscourseTranslator::TopicTranslator.expects(:translate).with(topic, "ja").once + + job.execute({ topic_id: topic.id }) + end + + it "handles translation errors gracefully" do + topic.update(locale: "en") + DiscourseTranslator::TopicLocaleDetector.expects(:detect_locale).with(topic).returns("en") + DiscourseTranslator::TopicTranslator.expects(:translate).raises(StandardError.new("API error")) + + expect { job.execute({ topic_id: topic.id }) }.not_to raise_error + end +end diff --git a/spec/jobs/topics_locale_detection_backfill_spec.rb b/spec/jobs/topics_locale_detection_backfill_spec.rb new file mode 100644 index 00000000..b0828d13 --- /dev/null +++ b/spec/jobs/topics_locale_detection_backfill_spec.rb @@ -0,0 +1,79 @@ +# frozen_string_literal: true + +describe Jobs::TopicsLocaleDetectionBackfill do + fab!(:topic) { Fabricate(:topic, locale: nil) } + subject(:job) { described_class.new } + + before do + SiteSetting.translator_enabled = true + SiteSetting.experimental_content_translation = true + SiteSetting.automatic_translation_backfill_rate = 100 + end + + it "does nothing when translator is disabled" do + SiteSetting.translator_enabled = false + DiscourseTranslator::TopicLocaleDetector.expects(:detect_locale).never + + job.execute({}) + end + + it "does nothing when content translation is disabled" do + SiteSetting.experimental_content_translation = false + DiscourseTranslator::TopicLocaleDetector.expects(:detect_locale).never + + job.execute({}) + end + + it "does nothing when there are no topics to detect" do + Topic.update_all(locale: "en") + DiscourseTranslator::TopicLocaleDetector.expects(:detect_locale).never + + job.execute({}) + end + + it "detects locale for topics with nil locale" do + DiscourseTranslator::TopicLocaleDetector.expects(:detect_locale).with(topic).once + job.execute({}) + end + + it "detects most recently updated topics first" do + topic_2 = Fabricate(:topic, locale: nil) + topic_3 = Fabricate(:topic, locale: nil) + + topic.update!(updated_at: 3.days.ago) + topic_2.update!(updated_at: 2.day.ago) + topic_3.update!(updated_at: 4.day.ago) + + SiteSetting.automatic_translation_backfill_rate = 1 + + DiscourseTranslator::TopicLocaleDetector.expects(:detect_locale).with(topic_2).once + DiscourseTranslator::TopicLocaleDetector.expects(:detect_locale).with(topic).never + DiscourseTranslator::TopicLocaleDetector.expects(:detect_locale).with(topic_3).never + + job.execute({}) + end + + it "skips bot topics" do + topic.update!(user: Discourse.system_user) + DiscourseTranslator::TopicLocaleDetector.expects(:detect_locale).with(topic).never + + job.execute({}) + end + + it "handles detection errors gracefully" do + DiscourseTranslator::TopicLocaleDetector + .expects(:detect_locale) + .with(topic) + .raises(StandardError.new("jiboomz")) + .once + + expect { job.execute({}) }.not_to raise_error + end + + it "logs a summary after running" do + DiscourseTranslator::TopicLocaleDetector.stubs(:detect_locale) + DiscourseTranslator::VerboseLogger.expects(:log).with(includes("Detected 1 topic locales")) + + job.execute({}) + end +end diff --git a/spec/jobs/translate_topics_spec.rb b/spec/jobs/translate_topics_spec.rb new file mode 100644 index 00000000..318546b4 --- /dev/null +++ b/spec/jobs/translate_topics_spec.rb @@ -0,0 +1,122 @@ +# frozen_string_literal: true + +describe Jobs::TranslateTopics do + fab!(:topic) + subject(:job) { described_class.new } + + let(:locales) { %w[en ja de] } + + before do + SiteSetting.translator_enabled = true + SiteSetting.experimental_content_translation = true + SiteSetting.automatic_translation_backfill_rate = 1 + SiteSetting.automatic_translation_target_languages = locales.join("|") + end + + it "does nothing when translator is disabled" do + SiteSetting.translator_enabled = false + DiscourseTranslator::TopicTranslator.expects(:translate).never + + job.execute({}) + end + + it "does nothing when content translation is disabled" do + SiteSetting.experimental_content_translation = false + DiscourseTranslator::TopicTranslator.expects(:translate).never + + job.execute({}) + end + + it "does nothing when no target languages are configured" do + SiteSetting.automatic_translation_target_languages = "" + DiscourseTranslator::TopicTranslator.expects(:translate).never + + job.execute({}) + end + + it "does nothing when there are no topics to translate" do + Topic.destroy_all + DiscourseTranslator::TopicTranslator.expects(:translate).never + + job.execute({}) + end + + it "skips topics that already have localizations" do + Topic.all.each do |topic| + Fabricate(:topic_localization, topic:, locale: "en") + Fabricate(:topic_localization, topic:, locale: "ja") + end + DiscourseTranslator::TopicTranslator.expects(:translate).never + + job.execute({}) + end + + it "skips bot topics" do + topic.update!(user: Discourse.system_user) + DiscourseTranslator::TopicTranslator.expects(:translate).with(topic, "en").never + DiscourseTranslator::TopicTranslator.expects(:translate).with(topic, "ja").never + + job.execute({}) + end + + it "handles translation errors gracefully" do + topic.update(locale: "es") + DiscourseTranslator::TopicTranslator + .expects(:translate) + .with(topic, "en") + .raises(StandardError.new("API error")) + DiscourseTranslator::TopicTranslator.expects(:translate).with(topic, "ja").once + DiscourseTranslator::TopicTranslator.expects(:translate).with(topic, "de").once + + expect { job.execute({}) }.not_to raise_error + end + + it "logs a summary after translation" do + topic.update(locale: "es") + DiscourseTranslator::TopicTranslator.stubs(:translate) + DiscourseTranslator::VerboseLogger.expects(:log).with(includes("Translated 1 topics to en")) + DiscourseTranslator::VerboseLogger.expects(:log).with(includes("Translated 1 topics to ja")) + DiscourseTranslator::VerboseLogger.expects(:log).with(includes("Translated 1 topics to de")) + + job.execute({}) + end + + context "for translation scenarios" do + it "scenario 1: skips topic when locale is not set" do + DiscourseTranslator::TopicTranslator.expects(:translate).never + + job.execute({}) + end + + it "scenario 2: returns topic with locale 'es' if localizations for en/ja/de do not exist" do + topic = Fabricate(:topic, locale: "es") + + DiscourseTranslator::TopicTranslator.expects(:translate).with(topic, "en").once + DiscourseTranslator::TopicTranslator.expects(:translate).with(topic, "ja").once + DiscourseTranslator::TopicTranslator.expects(:translate).with(topic, "de").once + + job.execute({}) + end + + it "scenario 3: returns topic with locale 'en' if ja/de localization does not exist" do + topic = Fabricate(:topic, locale: "en") + + DiscourseTranslator::TopicTranslator.expects(:translate).with(topic, "ja").once + DiscourseTranslator::TopicTranslator.expects(:translate).with(topic, "de").once + DiscourseTranslator::TopicTranslator.expects(:translate).with(topic, "en").never + + job.execute({}) + end + + it "scenario 4: skips topic with locale 'en' if 'ja' localization already exists" do + topic = Fabricate(:topic, locale: "en") + Fabricate(:topic_localization, topic: topic, locale: "ja") + + DiscourseTranslator::TopicTranslator.expects(:translate).with(topic, "en").never + DiscourseTranslator::TopicTranslator.expects(:translate).with(topic, "ja").never + DiscourseTranslator::TopicTranslator.expects(:translate).with(topic, "de").once + + job.execute({}) + end + end +end diff --git a/spec/lib/automatic_translation_spec.rb b/spec/lib/automatic_translation_spec.rb index b7756835..0e97f7fe 100644 --- a/spec/lib/automatic_translation_spec.rb +++ b/spec/lib/automatic_translation_spec.rb @@ -20,4 +20,59 @@ expect(job_enqueued?(job: :detect_translate_post, args: { post_id: post.id })).to eq false end end + + describe "upon topic created" do + it "enqueues detect topic locale and translate topic job" do + SiteSetting.experimental_content_localization = true + topic = + PostCreator.create!( + Fabricate(:admin), + raw: "post", + title: "topic", + skip_validations: true, + ).topic + + expect_job_enqueued(job: :detect_translate_topic, args: { topic_id: topic.id }) + end + + it "does not enqueue if setting disabled" do + SiteSetting.experimental_content_localization = false + topic = + PostCreator.create!( + Fabricate(:admin), + raw: "post", + title: "topic", + skip_validations: true, + ).topic + + expect(job_enqueued?(job: :detect_translate_topic, args: { topic_id: topic.id })).to eq false + end + end + + describe "upon first post (topic) edited" do + fab!(:post) { Fabricate(:post, post_number: 1) } + fab!(:non_first_post) { Fabricate(:post, post_number: 2) } + + it "enqueues detect topic locale and translate topic job" do + SiteSetting.experimental_content_localization = true + topic = post.topic + revisor = PostRevisor.new(post, topic) + revisor.revise!( + post.user, + { title: "A whole new hole" }, + { validate_post: false, bypass_bump: false }, + ) + revisor.post_process_post + + expect_job_enqueued(job: :detect_translate_topic, args: { topic_id: topic.id }) + end + + it "does not enqueue if setting disabled" do + SiteSetting.experimental_content_localization = false + + expect( + job_enqueued?(job: :detect_translate_topic, args: { topic_id: post.topic_id }), + ).to eq false + end + end end diff --git a/spec/services/topic_locale_detector_spec.rb b/spec/services/topic_locale_detector_spec.rb new file mode 100644 index 00000000..2dc868de --- /dev/null +++ b/spec/services/topic_locale_detector_spec.rb @@ -0,0 +1,27 @@ +# frozen_string_literal: true + +describe DiscourseTranslator::TopicLocaleDetector do + describe ".detect_locale" do + fab!(:topic) { Fabricate(:topic, title: "this is a cat topic", locale: nil) } + + let(:translator) { mock } + + before { DiscourseTranslator::Provider::TranslatorProvider.stubs(:get).returns(translator) } + + it "returns nil if topic is blank" do + expect(described_class.detect_locale(nil)).to eq(nil) + end + + it "calls detect! on the provider with the topic" do + translator.expects(:detect!).with(topic).returns("zh") + expect(described_class.detect_locale(topic)).to eq("zh_CN") + end + + it "updates the topic locale with the detected locale" do + translator.stubs(:detect!).with(topic).returns("zh") + expect { described_class.detect_locale(topic) }.to change { topic.reload.locale }.from( + nil, + ).to("zh_CN") + end + end +end diff --git a/spec/services/topic_translator_spec.rb b/spec/services/topic_translator_spec.rb new file mode 100644 index 00000000..3bae3e37 --- /dev/null +++ b/spec/services/topic_translator_spec.rb @@ -0,0 +1,79 @@ +# frozen_string_literal: true + +describe DiscourseTranslator::TopicTranslator do + describe ".translate" do + fab!(:topic) { Fabricate(:topic, title: "this is a cat topic :)") } + let(:translator) { mock } + let(:translated_title) { "これは猫の話題です :)" } + let(:fancy_title) { "これは猫の話題です :slight_smile:" } + let(:target_locale) { "ja" } + + before do + DiscourseTranslator::Provider::TranslatorProvider.stubs(:get).returns(translator) + translator.stubs(:translate_topic!).with(topic, :ja).returns(translated_title) + end + + it "returns nil if topic is blank" do + expect(described_class.translate(nil, "ja")).to eq(nil) + end + + it "returns nil if target_locale is blank" do + expect(described_class.translate(topic, nil)).to eq(nil) + expect(described_class.translate(topic, "")).to eq(nil) + end + + it "returns nil if target_locale is same as topic locale" do + topic.locale = "en" + + expect(described_class.translate(topic, "en")).to eq(nil) + end + + it "translates with topic and locale" do + translator.expects(:translate_topic!).with(topic, :ja).returns(translated_title) + + described_class.translate(topic, "ja") + end + + it "normalizes dashes to underscores and symbol type for locale" do + translator.expects(:translate_topic!).with(topic, :zh_CN).returns("这是一个猫主题 :)") + + described_class.translate(topic, "zh-CN") + end + + it "finds or creates a TopicLocalization and sets its fields" do + expect { + res = described_class.translate(topic, target_locale) + expect(res).to be_a(TopicLocalization) + expect(res).to have_attributes( + topic_id: topic.id, + locale: target_locale, + title: translated_title, + fancy_title: fancy_title, + localizer_user_id: Discourse.system_user.id, + ) + }.to change { TopicLocalization.count }.by(1) + end + + it "updates an existing TopicLocalization if present" do + localization = + Fabricate( + :topic_localization, + topic:, + locale: "ja", + title: "old title", + fancy_title: "old_fancy_title", + ) + expect { + expect(described_class.translate(topic, "ja")).to have_attributes( + id: localization.id, + title: translated_title, + fancy_title: fancy_title, + ) + expect(localization.reload).to have_attributes( + title: translated_title, + fancy_title: fancy_title, + ) + }.to_not change { TopicLocalization.count } + end + end +end