From ea153ae65fdb5720ccb3cdfc115fb3b5981cce8c Mon Sep 17 00:00:00 2001 From: Nat Date: Thu, 15 May 2025 16:54:38 +0800 Subject: [PATCH 1/2] DEV: Follow up after #294 --- app/jobs/regular/translate_translatable.rb | 25 -- .../automatic_translation_backfill.rb | 123 -------- .../automatic_translations.rb | 31 --- .../automatic_translation_backfill_spec.rb | 263 ------------------ .../jobs/detect_translatable_language_spec.rb | 6 +- spec/jobs/translate_translatable_spec.rb | 62 ----- spec/models/post_spec.rb | 36 --- 7 files changed, 5 insertions(+), 541 deletions(-) delete mode 100644 app/jobs/regular/translate_translatable.rb delete mode 100644 app/jobs/scheduled/automatic_translation_backfill.rb delete mode 100644 spec/jobs/automatic_translation_backfill_spec.rb delete mode 100644 spec/jobs/translate_translatable_spec.rb diff --git a/app/jobs/regular/translate_translatable.rb b/app/jobs/regular/translate_translatable.rb deleted file mode 100644 index 20db120f..00000000 --- a/app/jobs/regular/translate_translatable.rb +++ /dev/null @@ -1,25 +0,0 @@ -# frozen_string_literal: true - -module Jobs - class TranslateTranslatable < ::Jobs::Base - def execute(args) - return unless SiteSetting.translator_enabled - return if SiteSetting.automatic_translation_target_languages.blank? - - translatable = args[:type].constantize.find_by(id: args[:translatable_id]) - return if translatable.blank? - - target_locales = SiteSetting.automatic_translation_target_languages.split("|") - target_locales.each do |target_locale| - DiscourseTranslator::Provider::TranslatorProvider.get.translate( - translatable, - target_locale.to_sym, - ) - end - - topic_id, post_id = - translatable.is_a?(Post) ? [translatable.topic_id, translatable.id] : [translatable.id, 1] - MessageBus.publish("/topic/#{topic_id}", type: :translated_post, id: post_id) - end - end -end diff --git a/app/jobs/scheduled/automatic_translation_backfill.rb b/app/jobs/scheduled/automatic_translation_backfill.rb deleted file mode 100644 index 5ff1e351..00000000 --- a/app/jobs/scheduled/automatic_translation_backfill.rb +++ /dev/null @@ -1,123 +0,0 @@ -# frozen_string_literal: true - -module Jobs - class AutomaticTranslationBackfill < ::Jobs::Scheduled - every 5.minutes - cluster_concurrency 1 - - def execute(args = nil) - return unless SiteSetting.translator_enabled - return unless should_backfill? - - process_batch - end - - def fetch_untranslated_model_ids(model, content_column, limit, target_locale) - m = model.name.downcase - - DB.query_single(<<~SQL, target_locale: target_locale, limit: limit) - SELECT m.id - FROM #{model.table_name} m - #{limit_to_public_clause(model)} - WHERE m.deleted_at IS NULL - AND m.#{content_column} != '' - AND m.user_id > 0 - #{max_age_clause} - AND ( - m.id NOT IN ( - SELECT #{m}_id - FROM discourse_translator_#{m}_locales - WHERE detected_locale = :target_locale - ) - ) - AND ( - m.id NOT IN ( - SELECT #{m}_id - FROM discourse_translator_#{m}_translations - WHERE locale = :target_locale - ) - ) - ORDER BY m.updated_at DESC - LIMIT :limit - SQL - end - - private - - def should_backfill? - return false if SiteSetting.automatic_translation_target_languages.blank? - return false if SiteSetting.automatic_translation_backfill_rate == 0 - true - end - - def backfill_locales - @backfill_locales ||= - SiteSetting.automatic_translation_target_languages.split("|").map { |l| l.gsub("_", "-") } - end - - def translator - @translator_klass ||= DiscourseTranslator::Provider::TranslatorProvider.get - end - - def translate_records(type, record_ids, target_locale) - record_ids.each do |id| - record = type.find(id) - begin - translator.translate(record, target_locale.to_sym) - rescue => e - # continue with other locales even if one fails - Rails.logger.warn( - "Failed to machine-translate #{type.name}##{id} to #{target_locale}: #{e.message}\n#{e.backtrace.join("\n")}", - ) - next - end - end - end - - def process_batch - records_to_translate = SiteSetting.automatic_translation_backfill_rate - backfill_locales.each_with_index do |target_locale, i| - topic_ids = - fetch_untranslated_model_ids(Topic, "title", records_to_translate, target_locale) - post_ids = fetch_untranslated_model_ids(Post, "raw", records_to_translate, target_locale) - - next if topic_ids.empty? && post_ids.empty? - - DiscourseTranslator::VerboseLogger.log( - "Translating #{topic_ids.size} topics and #{post_ids.size} posts to #{target_locale}", - ) - - translate_records(Topic, topic_ids, target_locale) - translate_records(Post, post_ids, target_locale) - end - end - - def max_age_clause - return "" if SiteSetting.automatic_translation_backfill_max_age_days <= 0 - - "AND m.created_at > NOW() - INTERVAL '#{SiteSetting.automatic_translation_backfill_max_age_days} days'" - end - - def limit_to_public_clause(model) - return "" if !SiteSetting.automatic_translation_backfill_limit_to_public_content - - public_categories = Category.where(read_restricted: false).pluck(:id).join(", ") - if model == Post - limit_to_public_clause = <<~SQL - INNER JOIN topics t - ON m.topic_id = t.id - AND t.archetype = 'regular' - AND t.category_id IN (#{public_categories}) - SQL - elsif model == Topic - limit_to_public_clause = <<~SQL - INNER JOIN categories c - ON m.category_id = c.id - AND c.id IN (#{public_categories}) - SQL - end - - limit_to_public_clause - end - end -end diff --git a/lib/discourse_translator/automatic_translations.rb b/lib/discourse_translator/automatic_translations.rb index 00111584..43a5cebf 100644 --- a/lib/discourse_translator/automatic_translations.rb +++ b/lib/discourse_translator/automatic_translations.rb @@ -4,53 +4,22 @@ module DiscourseTranslator class AutomaticTranslations def inject(plugin) plugin.on(:post_process_cooked) do |_, post| - if translatable?(post) - Jobs.enqueue(:translate_translatable, type: "Post", translatable_id: post.id) - end - if SiteSetting.experimental_content_localization Jobs.enqueue(:detect_translate_post, post_id: post.id) end end plugin.on(:topic_created) do |topic| - if translatable?(topic) - Jobs.enqueue(:translate_translatable, type: "Topic", translatable_id: topic.id) - end - if SiteSetting.experimental_content_localization Jobs.enqueue(:detect_translate_topic, topic_id: topic.id) end end - plugin.on(:topic_edited) do |topic| - if translatable?(topic) - Jobs.enqueue(:translate_translatable, type: "Topic", translatable_id: topic.id) - end - end - plugin.on(:post_edited) do |post, topic_changed| if SiteSetting.experimental_content_localization && topic_changed Jobs.enqueue(:detect_translate_topic, topic_id: post.topic_id) end end end - - def translatable?(content) - return false if SiteSetting.automatic_translation_target_languages.blank? - return false if content.user_id <= 0 - return false if SiteSetting.automatic_translation_backfill_rate <= 0 - return true unless SiteSetting.automatic_translation_backfill_limit_to_public_content - - public_categories = Category.where(read_restricted: false).pluck(:id) - - if content.is_a?(Post) - public_categories.include?(content.topic.category_id) - elsif content.is_a?(Topic) - public_categories.include?(content.category_id) - else - false - end - end end end diff --git a/spec/jobs/automatic_translation_backfill_spec.rb b/spec/jobs/automatic_translation_backfill_spec.rb deleted file mode 100644 index af60596c..00000000 --- a/spec/jobs/automatic_translation_backfill_spec.rb +++ /dev/null @@ -1,263 +0,0 @@ -# frozen_string_literal: true - -describe Jobs::AutomaticTranslationBackfill do - before do - SiteSetting.translator_enabled = true - SiteSetting.translator_provider = "Google" - SiteSetting.translator_google_api_key = "api_key" - end - - def expect_google_check_language - Excon - .expects(:post) - .with(DiscourseTranslator::Provider::Google::SUPPORT_URI, anything, anything) - .returns( - Struct.new(:status, :body).new( - 200, - %{ { "data": { "languages": [ { "language": "es" }, { "language": "de" }] } } }, - ), - ) - .at_least_once - end - - def expect_google_detect(locale) - Excon - .expects(:post) - .with(DiscourseTranslator::Provider::Google::DETECT_URI, anything, anything) - .returns( - Struct.new(:status, :body).new( - 200, - %{ { "data": { "detections": [ [ { "language": "#{locale}" } ] ] } } }, - ), - ) - .once - end - - def expect_google_translate(text) - Excon - .expects(:post) - .with(DiscourseTranslator::Provider::Google::TRANSLATE_URI, body: anything, headers: anything) - .returns( - Struct.new(:status, :body).new( - 200, - %{ { "data": { "translations": [ { "translatedText": "#{text}" } ] } } }, - ), - ) - end - - describe "backfilling" do - it "does not backfill if translator is disabled" do - SiteSetting.translator_enabled = false - expect_any_instance_of(Jobs::AutomaticTranslationBackfill).not_to receive(:process_batch) - described_class.new.execute - end - - it "does not backfill if backfill languages are not set" do - SiteSetting.automatic_translation_target_languages = "" - expect_any_instance_of(Jobs::AutomaticTranslationBackfill).not_to receive(:process_batch) - described_class.new.execute - end - - it "does not backfill if backfill limit is set to 0" do - SiteSetting.automatic_translation_backfill_rate = 1 - SiteSetting.automatic_translation_target_languages = "de" - SiteSetting.automatic_translation_backfill_rate = 0 - expect_any_instance_of(Jobs::AutomaticTranslationBackfill).not_to receive(:process_batch) - end - - describe "with two locales ['de', 'es']" do - before do - SiteSetting.automatic_translation_backfill_rate = 100 - SiteSetting.automatic_translation_target_languages = "de|es" - SiteSetting.automatic_translation_backfill_limit_to_public_content = true - SiteSetting.automatic_translation_backfill_max_age_days = 5 - expect_google_check_language - end - - it "backfills if topic is not in target languages" do - expect_google_detect("de") - expect_google_translate("hola") - topic = Fabricate(:topic) - - described_class.new.execute - - expect(topic.translations.pluck(:locale, :translation)).to eq([%w[es hola]]) - end - - it "backfills both topics and posts" do - post = Fabricate(:post) - topic = post.topic - - topic.set_detected_locale("de") - post.set_detected_locale("es") - - expect_google_translate("hola") - expect_google_translate("hallo") - - described_class.new.execute - - expect(topic.translations.pluck(:locale, :translation)).to eq([%w[es hola]]) - expect(post.translations.pluck(:locale, :translation)).to eq([%w[de hallo]]) - end - - it "backfills only public content when limit_to_public_content is true" do - post = Fabricate(:post) - topic = post.topic - - private_category = Fabricate(:private_category, name: "Staff", group: Group[:staff]) - private_topic = Fabricate(:topic, category: private_category) - private_post = Fabricate(:post, topic: private_topic) - - topic.set_detected_locale("de") - post.set_detected_locale("es") - private_topic.set_detected_locale("de") - private_post.set_detected_locale("es") - - expect_google_translate("hola") - expect_google_translate("hallo") - - SiteSetting.automatic_translation_backfill_limit_to_public_content = true - described_class.new.execute - - expect(topic.translations.pluck(:locale, :translation)).to eq([%w[es hola]]) - expect(post.translations.pluck(:locale, :translation)).to eq([%w[de hallo]]) - - expect(private_topic.translations).to eq([]) - expect(private_post.translations).to eq([]) - end - - it "translate only content newer than automatic_translation_backfill_max_age_days" do - old_post = Fabricate(:post) - old_topic = old_post.topic - new_post = Fabricate(:post) - new_topic = new_post.topic - - old_topic.set_detected_locale("de") - new_topic.set_detected_locale("de") - old_post.set_detected_locale("es") - new_post.set_detected_locale("es") - - old_topic.created_at = 5.days.ago - old_topic.save! - old_post.created_at = 5.days.ago - old_post.save! - new_topic.created_at = 1.days.ago - new_topic.save! - new_post.created_at = 1.days.ago - new_post.save! - - expect_google_translate("hola") - expect_google_translate("hallo") - - SiteSetting.automatic_translation_backfill_max_age_days = 3 - described_class.new.execute - - expect(old_post.translations).to eq([]) - expect(new_post.translations.pluck(:locale, :translation)).to eq([%w[de hallo]]) - end - end - - describe "with just one locale ['de']" do - before do - SiteSetting.automatic_translation_backfill_rate = 100 - SiteSetting.automatic_translation_target_languages = "de" - expect_google_check_language - end - - it "backfills all (1) topics and (4) posts as it is within the maximum per job run" do - topic = Fabricate(:topic) - posts = Fabricate.times(4, :post, topic: topic) - - topic.set_detected_locale("es") - posts.each { |p| p.set_detected_locale("es") } - - expect_google_translate("hallo").times(5) - - described_class.new.execute - - expect(topic.translations.pluck(:locale, :translation)).to eq([%w[de hallo]]) - expect(posts.map { |p| p.translations.pluck(:locale, :translation).flatten }).to eq( - [%w[de hallo]] * 4, - ) - end - end - end - - describe ".fetch_untranslated_model_ids" do - fab!(:posts_1) { Fabricate.times(2, :post) } - fab!(:post_1) { Fabricate(:post) } - fab!(:post_2) { Fabricate(:post) } - fab!(:post_3) { Fabricate(:post) } - fab!(:posts_2) { Fabricate.times(2, :post) } - fab!(:post_4) { Fabricate(:post) } - fab!(:post_5) { Fabricate(:post) } - fab!(:post_6) { Fabricate(:post) } - fab!(:post_7) { Fabricate(:post) } - fab!(:posts_3) { Fabricate.times(2, :post) } - - before do -=begin -This is the scenario we are testing for: - | Post ID | detected_locale | translations | selected? | Why? | - |---------|-----------------|--------------|-----------|------| - | 1 | en | none | YES | source not de - | 2 | null | es | YES | missing de translation - | 3 | null | de | NO | has de translation - | 4 | de | es | NO | source is de and has es translation - | 5 | de | de | NO | both source and translation is de, missing es translation - | 6 | null | none | YES | no detected locale nor translation -=end - - [posts_1, posts_2, posts_3].flatten.each { |post| post.set_detected_locale("de") } - - post_1.set_detected_locale("en") - post_4.set_detected_locale("de") - post_5.set_detected_locale("de") - - post_2.set_translation("es", "hola") - post_3.set_translation("de", "hallo") - post_4.set_translation("es", "hola") - post_5.set_translation("de", "hallo") - end - - it "returns correct post ids needing translation in descending updated_at" do - # based on the table above, we will return post_6, post_2, post_1 - # but we will jumble its updated_at to test if it is sorted correctly - post_6.update!(updated_at: 1.day.ago) - post_1.update!(updated_at: 2.days.ago) - post_2.update!(updated_at: 3.days.ago) - - result = described_class.new.fetch_untranslated_model_ids(Post, "raw", 50, "de") - expect(result).to include(post_6.id, post_1.id, post_2.id) - end - - it "does not return posts that are deleted" do - post_1.trash! - result = described_class.new.fetch_untranslated_model_ids(Post, "raw", 50, "de") - expect(result).not_to include(post_1.id) - end - - it "does not return posts that are empty" do - post_1.raw = "" - post_1.save!(validate: false) - result = described_class.new.fetch_untranslated_model_ids(Post, "raw", 50, "de") - expect(result).not_to include(post_1.id) - end - - it "does not return posts by bots" do - post_1.update(user: Discourse.system_user) - - result = described_class.new.fetch_untranslated_model_ids(Post, "raw", 50, "de") - - expect(result).not_to include(post_1.id) - end - - it "does not return posts that already have the target locale translation, even if detected_locale is not the target" do - post_7.set_detected_locale("en") - post_7.set_translation("de", "hallo") - - result = described_class.new.fetch_untranslated_model_ids(Post, "raw", 50, "de") - expect(result).not_to include(post_7.id) - end - end -end diff --git a/spec/jobs/detect_translatable_language_spec.rb b/spec/jobs/detect_translatable_language_spec.rb index f3b99c22..a3a6d5a6 100644 --- a/spec/jobs/detect_translatable_language_spec.rb +++ b/spec/jobs/detect_translatable_language_spec.rb @@ -13,7 +13,11 @@ client = Aws::Translate::Client.new(stub_responses: true) client.stub_responses( :translate_text, - { translated_text: "大丈夫", source_language_code: "en", target_language_code: "jp" }, + { + translated_text: "translated text", + source_language_code: "en", + target_language_code: "jp", + }, ) Aws::Translate::Client.stubs(:new).returns(client) end diff --git a/spec/jobs/translate_translatable_spec.rb b/spec/jobs/translate_translatable_spec.rb deleted file mode 100644 index 7fdf9048..00000000 --- a/spec/jobs/translate_translatable_spec.rb +++ /dev/null @@ -1,62 +0,0 @@ -# frozen_string_literal: true - -describe Jobs::TranslateTranslatable do - fab!(:post) - fab!(:topic) - let!(:job) { Jobs::TranslateTranslatable.new } - - before do - SiteSetting.translator_enabled = true - SiteSetting.translator_provider = "Google" - SiteSetting.automatic_translation_backfill_rate = 100 - SiteSetting.automatic_translation_target_languages = "es|fr" - allow(DiscourseTranslator::Provider::Google).to receive(:translate) - end - - describe "#execute" do - it "does nothing when translator is disabled" do - SiteSetting.translator_enabled = false - - job.execute(type: "Post", translatable_id: post.id) - - expect(DiscourseTranslator::Provider::Google).not_to have_received(:translate) - end - - it "does nothing when target languages are empty" do - SiteSetting.automatic_translation_target_languages = "" - - job.execute(type: "Post", translatable_id: post.id) - - expect(DiscourseTranslator::Provider::Google).not_to have_received(:translate) - end - - it "translates posts to configured target languages" do - MessageBus - .expects(:publish) - .with("/topic/#{post.topic.id}", type: :translated_post, id: post.id) - .once - - job.execute(type: "Post", translatable_id: post.id) - - expect(DiscourseTranslator::Provider::Google).to have_received(:translate).with(post, :es) - expect(DiscourseTranslator::Provider::Google).to have_received(:translate).with(post, :fr) - end - - it "translates topics to configured target languages" do - MessageBus.expects(:publish).with("/topic/#{topic.id}", type: :translated_post, id: 1).once - - job.execute(type: "Topic", translatable_id: topic.id) - - expect(DiscourseTranslator::Provider::Google).to have_received(:translate).with(topic, :es) - expect(DiscourseTranslator::Provider::Google).to have_received(:translate).with(topic, :fr) - end - - it "does nothing when translatable is not found" do - MessageBus.expects(:publish).never - - job.execute(type: "Post", translatable_id: -1) - - expect(DiscourseTranslator::Provider::Google).not_to have_received(:translate) - end - end -end diff --git a/spec/models/post_spec.rb b/spec/models/post_spec.rb index 9e298b92..7d8b46cc 100644 --- a/spec/models/post_spec.rb +++ b/spec/models/post_spec.rb @@ -134,40 +134,4 @@ end end end - - describe "automatic translation job" do - fab!(:user) - - it "enqueues translate_translatable job when post cooked" do - SiteSetting.automatic_translation_backfill_rate = 100 - SiteSetting.automatic_translation_target_languages = "es" - post = Fabricate(:post, user: user) - CookedPostProcessor.new(post).post_process - - expect_job_enqueued( - job: :translate_translatable, - args: { - type: "Post", - translatable_id: post.id, - }, - ) - end - - it "does not enqueue translate_translatable job for bot posts" do - SiteSetting.automatic_translation_backfill_rate = 1 - SiteSetting.automatic_translation_target_languages = "es" - post = Fabricate(:post, user: Discourse.system_user) - CookedPostProcessor.new(post).post_process - - expect( - job_enqueued?( - job: :translate_translatable, - args: { - type: "Post", - translatable_id: post.id, - }, - ), - ).to eq false - end - end end From 18c454682f188062a4263bcd6602b0e0bac4803c Mon Sep 17 00:00:00 2001 From: Nat Date: Thu, 15 May 2025 20:42:52 +0800 Subject: [PATCH 2/2] Also prevent find errors by using find_by --- app/jobs/regular/detect_translate_post.rb | 2 +- app/jobs/regular/detect_translate_topic.rb | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/app/jobs/regular/detect_translate_post.rb b/app/jobs/regular/detect_translate_post.rb index 7853e8e7..cf5cacf4 100644 --- a/app/jobs/regular/detect_translate_post.rb +++ b/app/jobs/regular/detect_translate_post.rb @@ -7,7 +7,7 @@ def execute(args) return unless SiteSetting.experimental_content_translation return if args[:post_id].blank? - post = Post.find(args[:post_id]) + post = Post.find_by(id: args[:post_id]) return if post.blank? || post.raw.blank? || post.deleted_at.present? || post.user_id <= 0 detected_locale = DiscourseTranslator::PostLocaleDetector.detect_locale(post) diff --git a/app/jobs/regular/detect_translate_topic.rb b/app/jobs/regular/detect_translate_topic.rb index b3c5d290..e4a4305b 100644 --- a/app/jobs/regular/detect_translate_topic.rb +++ b/app/jobs/regular/detect_translate_topic.rb @@ -7,7 +7,7 @@ def execute(args) return unless SiteSetting.experimental_content_translation return if args[:topic_id].blank? - topic = Topic.find(args[:topic_id]) + topic = Topic.find_by(id: args[:topic_id]) if topic.blank? || topic.title.blank? || topic.deleted_at.present? || topic.user_id <= 0 return end