diff --git a/app/jobs/scheduled/automatic_translation_backfill.rb b/app/jobs/scheduled/automatic_translation_backfill.rb index bb85d315..12ca0093 100644 --- a/app/jobs/scheduled/automatic_translation_backfill.rb +++ b/app/jobs/scheduled/automatic_translation_backfill.rb @@ -3,19 +3,13 @@ module Jobs class AutomaticTranslationBackfill < ::Jobs::Scheduled every 5.minutes - - BACKFILL_LOCK_KEY = "discourse_translator_backfill_lock" + cluster_concurrency 1 def execute(args = nil) return unless SiteSetting.translator_enabled return unless should_backfill? - return unless secure_backfill_lock - begin - process_batch - ensure - Discourse.redis.del(BACKFILL_LOCK_KEY) - end + process_batch end def fetch_untranslated_model_ids(model, content_column, limit, target_locale) @@ -23,9 +17,11 @@ def fetch_untranslated_model_ids(model, content_column, limit, target_locale) DB.query_single(<<~SQL, target_locale: target_locale, limit: limit) SELECT m.id FROM #{model.table_name} m + #{limit_to_public_clause(model)} WHERE m.deleted_at IS NULL AND m.#{content_column} != '' AND m.user_id > 0 + #{max_age_clause} AND ( NOT EXISTS ( SELECT 1 @@ -58,10 +54,6 @@ def should_backfill? true end - def secure_backfill_lock - Discourse.redis.set(BACKFILL_LOCK_KEY, "1", ex: 5.minutes.to_i, nx: true) - end - def translations_per_run [(SiteSetting.automatic_translation_backfill_maximum_translations_per_hour / 12), 1].max end @@ -118,5 +110,33 @@ def process_batch translate_records(Post, post_ids, target_locale) end end + + def max_age_clause + return "" if SiteSetting.automatic_translation_backfill_max_age_days <= 0 + + "AND m.created_at > NOW() - INTERVAL '#{SiteSetting.automatic_translation_backfill_max_age_days} days'" + end + + def limit_to_public_clause(model) + return "" if !SiteSetting.automatic_translation_backfill_limit_to_public_content + + public_categories = Category.where(read_restricted: false).pluck(:id).join(", ") + if model == Post + limit_to_public_clause = <<~SQL + INNER JOIN topics t + ON m.topic_id = t.id + AND t.archetype = 'regular' + AND t.category_id IN (#{public_categories}) + SQL + elsif model == Topic + limit_to_public_clause = <<~SQL + INNER JOIN categories c + ON m.category_id = c.id + AND c.id IN (#{public_categories}) + SQL + end + + limit_to_public_clause + end end end diff --git a/config/settings.yml b/config/settings.yml index 3d0ac0ea..d67b06df 100644 --- a/config/settings.yml +++ b/config/settings.yml @@ -26,6 +26,14 @@ discourse_translator: default: 0 client: false hidden: true + automatic_translation_backfill_limit_to_public_content: + default: false + client: false + hidden: true + automatic_translation_backfill_max_age_days: + default: 0 + client: false + hidden: true translator_azure_subscription_key: default: '' translator_azure_region: diff --git a/spec/jobs/automatic_translation_backfill_spec.rb b/spec/jobs/automatic_translation_backfill_spec.rb index 4298f4fd..7ed90e18 100644 --- a/spec/jobs/automatic_translation_backfill_spec.rb +++ b/spec/jobs/automatic_translation_backfill_spec.rb @@ -65,17 +65,12 @@ def expect_google_translate(text) expect_any_instance_of(Jobs::AutomaticTranslationBackfill).not_to receive(:process_batch) end - it "does not backfill if backfill lock is not secure" do - SiteSetting.automatic_translation_backfill_maximum_translations_per_hour = 1 - SiteSetting.automatic_translation_target_languages = "de" - Discourse.redis.set("discourse_translator_backfill_lock", "1") - expect_any_instance_of(Jobs::AutomaticTranslationBackfill).not_to receive(:translate_records) - end - describe "with two locales ['de', 'es']" do before do SiteSetting.automatic_translation_backfill_maximum_translations_per_hour = 100 SiteSetting.automatic_translation_target_languages = "de|es" + SiteSetting.automatic_translation_backfill_limit_to_public_content = true + SiteSetting.automatic_translation_backfill_max_age_days = 5 expect_google_check_language end @@ -104,6 +99,62 @@ def expect_google_translate(text) expect(topic.translations.pluck(:locale, :translation)).to eq([%w[es hola]]) expect(post.translations.pluck(:locale, :translation)).to eq([%w[de hallo]]) end + + it "backfills only public content when limit_to_public_content is true" do + post = Fabricate(:post) + topic = post.topic + + private_category = Fabricate(:private_category, name: "Staff", group: Group[:staff]) + private_topic = Fabricate(:topic, category: private_category) + private_post = Fabricate(:post, topic: private_topic) + + topic.set_detected_locale("de") + post.set_detected_locale("es") + private_topic.set_detected_locale("de") + private_post.set_detected_locale("es") + + expect_google_translate("hola") + expect_google_translate("hallo") + + SiteSetting.automatic_translation_backfill_limit_to_public_content = true + described_class.new.execute + + expect(topic.translations.pluck(:locale, :translation)).to eq([%w[es hola]]) + expect(post.translations.pluck(:locale, :translation)).to eq([%w[de hallo]]) + + expect(private_topic.translations).to eq([]) + expect(private_post.translations).to eq([]) + end + + it "translate only content newer than automatic_translation_backfill_max_age_days" do + old_post = Fabricate(:post) + old_topic = old_post.topic + new_post = Fabricate(:post) + new_topic = new_post.topic + + old_topic.set_detected_locale("de") + new_topic.set_detected_locale("de") + old_post.set_detected_locale("es") + new_post.set_detected_locale("es") + + old_topic.created_at = 5.days.ago + old_topic.save! + old_post.created_at = 5.days.ago + old_post.save! + new_topic.created_at = 1.days.ago + new_topic.save! + new_post.created_at = 1.days.ago + new_post.save! + + expect_google_translate("hola") + expect_google_translate("hallo") + + SiteSetting.automatic_translation_backfill_max_age_days = 3 + described_class.new.execute + + expect(old_post.translations).to eq([]) + expect(new_post.translations.pluck(:locale, :translation)).to eq([%w[de hallo]]) + end end describe "with just one locale ['de']" do