diff --git a/app/jobs/regular/translate_translatable.rb b/app/jobs/regular/translate_translatable.rb new file mode 100644 index 00000000..aaaf1a26 --- /dev/null +++ b/app/jobs/regular/translate_translatable.rb @@ -0,0 +1,25 @@ +# frozen_string_literal: true + +module Jobs + class TranslateTranslatable < ::Jobs::Base + def execute(args) + return unless SiteSetting.translator_enabled + return if SiteSetting.automatic_translation_target_languages.blank? + + translatable = args[:type].constantize.find_by(id: args[:translatable_id]) + return if translatable.blank? + + target_locales = SiteSetting.automatic_translation_target_languages.split("|") + target_locales.each do |target_locale| + "DiscourseTranslator::#{SiteSetting.translator}".constantize.translate( + translatable, + target_locale.to_sym, + ) + end + + topic_id, post_id = + translatable.is_a?(Post) ? [translatable.topic_id, translatable.id] : [translatable.id, 1] + MessageBus.publish("/topic/#{topic_id}", type: :revised, id: post_id) + end + end +end diff --git a/app/jobs/scheduled/automatic_translation_backfill.rb b/app/jobs/scheduled/automatic_translation_backfill.rb new file mode 100644 index 00000000..cb412d60 --- /dev/null +++ b/app/jobs/scheduled/automatic_translation_backfill.rb @@ -0,0 +1,104 @@ +# frozen_string_literal: true + +module Jobs + class AutomaticTranslationBackfill < ::Jobs::Scheduled + every 5.minutes + + BACKFILL_LOCK_KEY = "discourse_translator_backfill_lock" + + def execute(args = nil) + return unless SiteSetting.translator_enabled + return unless should_backfill? + return unless secure_backfill_lock + + begin + process_batch + ensure + Discourse.redis.del(BACKFILL_LOCK_KEY) + end + end + + def fetch_untranslated_model_ids(model = Post, limit = 100, target_locales = backfill_locales) + m = model.name.downcase + DB.query_single(<<~SQL, target_locales: target_locales, limit: limit) + SELECT m.id + FROM #{m}s m + LEFT JOIN discourse_translator_#{m}_locales dl ON dl.#{m}_id = m.id + LEFT JOIN LATERAL ( + SELECT array_agg(DISTINCT locale)::text[] as locales + FROM discourse_translator_#{m}_translations dt + WHERE dt.#{m}_id = m.id + ) translations ON true + WHERE NOT ( + ARRAY[:target_locales]::text[] <@ + (COALESCE( + array_cat( + ARRAY[COALESCE(dl.detected_locale, '')]::text[], + COALESCE(translations.locales, ARRAY[]::text[]) + ), + ARRAY[]::text[] + )) + ) + ORDER BY m.id DESC + LIMIT :limit + SQL + end + + private + + def should_backfill? + return false if SiteSetting.automatic_translation_target_languages.blank? + return false if SiteSetting.automatic_translation_backfill_maximum_translations_per_hour == 0 + true + end + + def secure_backfill_lock + Discourse.redis.set(BACKFILL_LOCK_KEY, "1", ex: 5.minutes.to_i, nx: true) + end + + def translations_per_run + [ + (SiteSetting.automatic_translation_backfill_maximum_translations_per_hour / 12) / + backfill_locales.size, + 1, + ].max + end + + def backfill_locales + @backfill_locales ||= SiteSetting.automatic_translation_target_languages.split("|") + end + + def translator + @translator_klass ||= "DiscourseTranslator::#{SiteSetting.translator}".constantize + end + + def translate_records(type, record_ids) + record_ids.each do |id| + record = type.find(id) + backfill_locales.each do |target_locale| + begin + translator.translate(record, target_locale.to_sym) + rescue => e + # continue with other locales even if one fails + Rails.logger.warn( + "Failed to machine-translate #{type.name}##{id} to #{target_locale}: #{e.message}\n#{e.backtrace.join("\n")}", + ) + next + end + end + end + end + + def process_batch + models_translated = [Post, Topic].size + translations_per_model = [translations_per_run / models_translated, 1].max + topic_ids = fetch_untranslated_model_ids(Topic, translations_per_model) + translations_per_model = translations_per_run - topic_ids.size + post_ids = fetch_untranslated_model_ids(Post, translations_per_model) + return if topic_ids.empty? && post_ids.empty? + + translate_records(Topic, topic_ids) + translate_records(Post, post_ids) + end + end +end diff --git a/app/services/discourse_translator/amazon.rb b/app/services/discourse_translator/amazon.rb index 314ec9b2..3f3f3d95 100644 --- a/app/services/discourse_translator/amazon.rb +++ b/app/services/discourse_translator/amazon.rb @@ -126,7 +126,7 @@ def self.detect!(topic_or_post) def self.translate!(translatable, target_locale_sym = I18n.locale) detected_lang = detect(translatable) - save_translation(translatable) do + save_translation(translatable, target_locale_sym) do begin client.translate_text( { diff --git a/app/services/discourse_translator/base.rb b/app/services/discourse_translator/base.rb index 44909fd1..8a1eaff3 100644 --- a/app/services/discourse_translator/base.rb +++ b/app/services/discourse_translator/base.rb @@ -48,7 +48,7 @@ def self.translate(translatable, target_locale_sym = I18n.locale) ), ) end - [detected_lang, translate!(translatable)] + [detected_lang, translate!(translatable, target_locale_sym)] end # Subclasses must implement this method to translate the text of a post or topic @@ -77,9 +77,9 @@ def self.access_token raise "Not Implemented" end - def self.save_translation(translatable) + def self.save_translation(translatable, target_locale_sym = I18n.locale) translation = yield - translatable.set_translation(I18n.locale, translation) + translatable.set_translation(target_locale_sym, translation) translation end diff --git a/app/services/discourse_translator/discourse_ai.rb b/app/services/discourse_translator/discourse_ai.rb index b057e9bd..fc7de8bb 100644 --- a/app/services/discourse_translator/discourse_ai.rb +++ b/app/services/discourse_translator/discourse_ai.rb @@ -21,7 +21,7 @@ def self.detect!(topic_or_post) def self.translate!(translatable, target_locale_sym = I18n.locale) return unless required_settings_enabled - save_translation(translatable) do + save_translation(translatable, target_locale_sym) do ::DiscourseAi::Translator.new( text_for_translation(translatable), target_locale_sym, diff --git a/app/services/discourse_translator/google.rb b/app/services/discourse_translator/google.rb index 4d409484..19f2ad01 100644 --- a/app/services/discourse_translator/google.rb +++ b/app/services/discourse_translator/google.rb @@ -91,7 +91,7 @@ def self.translate_supported?(source, target) def self.translate!(translatable, target_locale_sym = I18n.locale) detected_locale = detect(translatable) - save_translation(translatable) do + save_translation(translatable, target_locale_sym) do res = result( TRANSLATE_URI, diff --git a/app/services/discourse_translator/libre_translate.rb b/app/services/discourse_translator/libre_translate.rb index 0e641f20..4ee81fa3 100644 --- a/app/services/discourse_translator/libre_translate.rb +++ b/app/services/discourse_translator/libre_translate.rb @@ -98,7 +98,7 @@ def self.translate_supported?(source, target) def self.translate!(translatable, target_locale_sym = I18n.locale) detected_lang = detect(translatable) - save_translation(translatable) do + save_translation(translatable, target_locale_sym) do res = result( translate_uri, diff --git a/app/services/discourse_translator/microsoft.rb b/app/services/discourse_translator/microsoft.rb index e70295bf..2f75f2a9 100644 --- a/app/services/discourse_translator/microsoft.rb +++ b/app/services/discourse_translator/microsoft.rb @@ -166,7 +166,7 @@ def self.translate!(translatable, target_locale_sym = I18n.locale) locale = SUPPORTED_LANG_MAPPING[target_locale_sym] || (raise I18n.t("translator.not_supported")) - save_translation(translatable) do + save_translation(translatable, target_locale_sym) do query = default_query.merge("from" => detected_lang, "to" => locale, "textType" => "html") body = [{ "Text" => text_for_translation(translatable) }].to_json diff --git a/app/services/discourse_translator/yandex.rb b/app/services/discourse_translator/yandex.rb index 2ea91b95..7cf7d650 100644 --- a/app/services/discourse_translator/yandex.rb +++ b/app/services/discourse_translator/yandex.rb @@ -137,7 +137,7 @@ def self.translate!(translatable, target_locale_sym = I18n.locale) locale = SUPPORTED_LANG_MAPPING[target_locale_sym] || (raise I18n.t("translator.not_supported")) - save_translation(translatable) do + save_translation(translatable, target_locale_sym) do query = default_query.merge( "lang" => "#{detected_lang}-#{locale}", diff --git a/config/locales/server.en.yml b/config/locales/server.en.yml index e4230523..56cc7548 100644 --- a/config/locales/server.en.yml +++ b/config/locales/server.en.yml @@ -18,6 +18,7 @@ en: restrict_translation_by_group: "Only allowed groups can translate" restrict_translation_by_poster_group: "Only allow translation of posts made by users in allowed groups. If empty, allow translations of posts from all users." experimental_anon_language_switcher: "Enable experimental language switcher for anonymous users. This will allow anonymous users to switch between translated versions of Discourse and user-contributed content in topics." + translate_posts_to_languages: "Translate posts to languages" errors: set_locale_cookie_requirements: "The experimental language switcher for anonymous users requires the `set locale from cookie` site setting to be enabled." experimental_topic_translation: "Enable experimental topic translation feature. This replaces existing post in-line translation with a button that allows users to translate the entire topic." diff --git a/config/settings.yml b/config/settings.yml index bb643193..33abd251 100644 --- a/config/settings.yml +++ b/config/settings.yml @@ -109,3 +109,13 @@ discourse_translator: experimental_topic_translation: default: false client: true + automatic_translation_target_languages: + default: "" + type: list + list_type: named + choices: "DiscourseTranslator::TranslatableLanguagesSetting.values" + allow_any: false + automatic_translation_backfill_maximum_translations_per_hour: + default: 0 + client: false + hidden: true diff --git a/lib/discourse_translator/translatable_languages_setting.rb b/lib/discourse_translator/translatable_languages_setting.rb new file mode 100644 index 00000000..804a1b07 --- /dev/null +++ b/lib/discourse_translator/translatable_languages_setting.rb @@ -0,0 +1,11 @@ +# frozen_string_literal: true + +module DiscourseTranslator + class TranslatableLanguagesSetting < LocaleSiteSetting + def self.printable_values + values.map { |v| v[:value] } + end + + @lock = Mutex.new + end +end diff --git a/plugin.rb b/plugin.rb index 9fead2d9..4386a04c 100644 --- a/plugin.rb +++ b/plugin.rb @@ -38,6 +38,24 @@ module ::DiscourseTranslator end end + on(:post_process_cooked) do |_, post| + if SiteSetting.automatic_translation_target_languages.present? + Jobs.enqueue(:translate_translatable, type: "Post", translatable_id: post.id) + end + end + + on(:topic_created) do |topic| + if SiteSetting.automatic_translation_target_languages.present? + Jobs.enqueue(:translate_translatable, type: "Topic", translatable_id: topic.id) + end + end + + on(:topic_edited) do |topic| + if SiteSetting.automatic_translation_target_languages.present? + Jobs.enqueue(:translate_translatable, type: "Topic", translatable_id: topic.id) + end + end + add_to_serializer :post, :can_translate do scope.can_translate?(object) end diff --git a/spec/jobs/automatic_translation_backfill_spec.rb b/spec/jobs/automatic_translation_backfill_spec.rb new file mode 100644 index 00000000..25ccbb77 --- /dev/null +++ b/spec/jobs/automatic_translation_backfill_spec.rb @@ -0,0 +1,184 @@ +# frozen_string_literal: true + +describe Jobs::AutomaticTranslationBackfill do + before do + SiteSetting.translator_enabled = true + SiteSetting.translator = "Google" + SiteSetting.translator_google_api_key = "api_key" + end + + def expect_google_check_language + Excon + .expects(:post) + .with(DiscourseTranslator::Google::SUPPORT_URI, anything, anything) + .returns( + Struct.new(:status, :body).new( + 200, + %{ { "data": { "languages": [ { "language": "es" }, { "language": "de" }] } } }, + ), + ) + .at_least_once + end + + def expect_google_detect(locale) + Excon + .expects(:post) + .with(DiscourseTranslator::Google::DETECT_URI, anything, anything) + .returns( + Struct.new(:status, :body).new( + 200, + %{ { "data": { "detections": [ [ { "language": "#{locale}" } ] ] } } }, + ), + ) + .once + end + + def expect_google_translate(text) + Excon + .expects(:post) + .with(DiscourseTranslator::Google::TRANSLATE_URI, body: anything, headers: anything) + .returns( + Struct.new(:status, :body).new( + 200, + %{ { "data": { "translations": [ { "translatedText": "#{text}" } ] } } }, + ), + ) + end + + describe "backfilling" do + it "does not backfill if translator is disabled" do + SiteSetting.translator_enabled = false + expect_any_instance_of(Jobs::AutomaticTranslationBackfill).not_to receive(:process_batch) + described_class.new.execute + end + + it "does not backfill if backfill languages are not set" do + SiteSetting.automatic_translation_target_languages = "" + expect_any_instance_of(Jobs::AutomaticTranslationBackfill).not_to receive(:process_batch) + described_class.new.execute + end + + it "does not backfill if backfill limit is set to 0" do + SiteSetting.automatic_translation_target_languages = "de" + SiteSetting.automatic_translation_backfill_maximum_translations_per_hour = 0 + expect_any_instance_of(Jobs::AutomaticTranslationBackfill).not_to receive(:process_batch) + end + + it "does not backfill if backfill lock is not secure" do + SiteSetting.automatic_translation_target_languages = "de" + SiteSetting.automatic_translation_backfill_maximum_translations_per_hour = 1 + Discourse.redis.set("discourse_translator_backfill_lock", "1") + expect_any_instance_of(Jobs::AutomaticTranslationBackfill).not_to receive(:translate_records) + end + + describe "with two locales ['de', 'es']" do + before do + SiteSetting.automatic_translation_target_languages = "de|es" + SiteSetting.automatic_translation_backfill_maximum_translations_per_hour = 100 + expect_google_check_language + end + + it "backfills if topic is not in target languages" do + expect_google_detect("de") + expect_google_translate("hola") + topic = Fabricate(:topic) + + described_class.new.execute + + expect(topic.translations.pluck(:locale, :translation)).to eq([%w[es hola]]) + end + + it "backfills both topics and posts" do + post = Fabricate(:post) + topic = post.topic + + topic.set_detected_locale("de") + post.set_detected_locale("es") + + expect_google_translate("hallo") + expect_google_translate("hola") + + described_class.new.execute + + expect(topic.translations.pluck(:locale, :translation)).to eq([%w[es hola]]) + expect(post.translations.pluck(:locale, :translation)).to eq([%w[de hallo]]) + end + end + + describe "with just one locale ['de']" do + before do + SiteSetting.automatic_translation_target_languages = "de" + SiteSetting.automatic_translation_backfill_maximum_translations_per_hour = 5 * 12 + expect_google_check_language + end + + it "backfills all (1) topics and (4) posts as it is within the maximum per job run" do + topic = Fabricate(:topic) + posts = Fabricate.times(4, :post, topic: topic) + + topic.set_detected_locale("es") + posts.each { |p| p.set_detected_locale("es") } + + expect_google_translate("hallo").times(5) + + described_class.new.execute + + expect(topic.translations.pluck(:locale, :translation)).to eq([%w[de hallo]]) + expect(posts.map { |p| p.translations.pluck(:locale, :translation).flatten }).to eq( + [%w[de hallo]] * 4, + ) + end + end + end + + describe ".fetch_untranslated_model_ids" do + fab!(:posts_1) { Fabricate.times(2, :post) } + fab!(:post_1) { Fabricate(:post) } + fab!(:post_2) { Fabricate(:post) } + fab!(:post_3) { Fabricate(:post) } + fab!(:posts_2) { Fabricate.times(2, :post) } + fab!(:post_4) { Fabricate(:post) } + fab!(:post_5) { Fabricate(:post) } + fab!(:post_6) { Fabricate(:post) } + fab!(:post_7) { Fabricate(:post) } + fab!(:posts_3) { Fabricate.times(2, :post) } + + before do +=begin +This is the scenario we are testing for: + | Post ID | detected_locale | translations | selected? | Why? | + |---------|-----------------|--------------|-----------|------| + | 1 | en | none | YES | source not de/es, needs both translations + | 2 | es | none | YES | source is es, but missing de translation + | 3 | null | es | YES | missing de translation + | 4 | null | de, es | NO | has both de and es translations + | 5 | de | es | NO | source is de and has es translation + | 6 | de | de | YES | both source and translation is de, missing es translation + | 7 | de | ja | YES | source is de, missing es translation +=end + + [posts_1, posts_2, posts_3].flatten.each do |post| + post.set_translation("es", "hola") + post.set_translation("de", "hallo") + end + + post_1.set_detected_locale("en") + post_2.set_detected_locale("es") + post_5.set_detected_locale("de") + post_6.set_detected_locale("de") + post_7.set_detected_locale("de") + + post_3.set_translation("es", "hola") + post_4.set_translation("de", "hallo") + post_4.set_translation("es", "hola") + post_5.set_translation("es", "hola") + post_6.set_translation("de", "hallo") + post_7.set_translation("ja", "こんにちは") + end + + it "returns correct post ids needing translation in descending id" do + result = described_class.new.fetch_untranslated_model_ids(Post, 50, %w[de es]) + expect(result).to include(post_7.id, post_6.id, post_3.id, post_2.id, post_1.id) + end + end +end diff --git a/spec/jobs/detect_posts_language_spec.rb b/spec/jobs/detect_posts_language_spec.rb index 6dfd1f3c..5fc22451 100644 --- a/spec/jobs/detect_posts_language_spec.rb +++ b/spec/jobs/detect_posts_language_spec.rb @@ -15,7 +15,9 @@ { translated_text: "大丈夫", source_language_code: "en", target_language_code: "jp" }, ) Aws::Translate::Client.stubs(:new).returns(client) - posts.each { |post| Discourse.redis.sadd?(redis_key, post.id) } + Discourse.redis.del(redis_key) + described_class.const_set(:MAX_QUEUE_SIZE, 100) + posts.each { |post| Discourse.redis.sadd(redis_key, post.id) } end it "processes posts in batches and updates their translations" do @@ -37,17 +39,17 @@ post.reload expect(post.detected_locale).to be_nil end - expect(Discourse.redis.smembers(redis_key)).to match_array(posts.map(&:id).map(&:to_s)) end it "processes a maximum of MAX_QUEUE_SIZE posts per run" do - large_number = 2000 - large_number.times { |i| Discourse.redis.sadd?(redis_key, i + 1) } + queue_size = 4 + described_class.const_set(:MAX_QUEUE_SIZE, queue_size) + described_class.new.execute({}) remaining = Discourse.redis.scard(redis_key) - expect(remaining).to eq(large_number - Jobs::DetectPostsLanguage::MAX_QUEUE_SIZE) + expect(remaining).to eq(posts.size - queue_size) end it "handles an empty Redis queue gracefully" do @@ -56,6 +58,8 @@ end it "removes successfully processed posts from Redis" do + posts.each { |post| expect(Discourse.redis.sismember(redis_key, post.id)).to be_truthy } + described_class.new.execute({}) posts.each { |post| expect(Discourse.redis.sismember(redis_key, post.id)).to be_falsey } @@ -75,8 +79,6 @@ described_class.new.execute({}) - posts.each do |post| - expect(DistributedMutex).to have_received(:synchronize).with("detect_translation_#{post.id}") - end + expect(DistributedMutex).to have_received(:synchronize).at_least(5) end end diff --git a/spec/jobs/translate_translatable_spec.rb b/spec/jobs/translate_translatable_spec.rb new file mode 100644 index 00000000..849262fa --- /dev/null +++ b/spec/jobs/translate_translatable_spec.rb @@ -0,0 +1,58 @@ +# frozen_string_literal: true + +describe Jobs::TranslateTranslatable do + fab!(:post) + fab!(:topic) + let!(:job) { Jobs::TranslateTranslatable.new } + + before do + SiteSetting.translator_enabled = true + SiteSetting.translator = "Google" + SiteSetting.automatic_translation_target_languages = "es|fr" + allow(DiscourseTranslator::Google).to receive(:translate) + end + + describe "#execute" do + it "does nothing when translator is disabled" do + SiteSetting.translator_enabled = false + + job.execute(type: "Post", translatable_id: post.id) + + expect(DiscourseTranslator::Google).not_to have_received(:translate) + end + + it "does nothing when target languages are empty" do + SiteSetting.automatic_translation_target_languages = "" + + job.execute(type: "Post", translatable_id: post.id) + + expect(DiscourseTranslator::Google).not_to have_received(:translate) + end + + it "translates posts to configured target languages" do + MessageBus.expects(:publish).with("/topic/#{post.topic.id}", type: :revised, id: post.id).once + + job.execute(type: "Post", translatable_id: post.id) + + expect(DiscourseTranslator::Google).to have_received(:translate).with(post, :es) + expect(DiscourseTranslator::Google).to have_received(:translate).with(post, :fr) + end + + it "translates topics to configured target languages" do + MessageBus.expects(:publish).with("/topic/#{topic.id}", type: :revised, id: 1).once + + job.execute(type: "Topic", translatable_id: topic.id) + + expect(DiscourseTranslator::Google).to have_received(:translate).with(topic, :es) + expect(DiscourseTranslator::Google).to have_received(:translate).with(topic, :fr) + end + + it "does nothing when translatable is not found" do + MessageBus.expects(:publish).never + + job.execute(type: "Post", translatable_id: -1) + + expect(DiscourseTranslator::Google).not_to have_received(:translate) + end + end +end