Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 32 additions & 12 deletions app/jobs/scheduled/automatic_translation_backfill.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,29 +3,25 @@
module Jobs
class AutomaticTranslationBackfill < ::Jobs::Scheduled
every 5.minutes

BACKFILL_LOCK_KEY = "discourse_translator_backfill_lock"
cluster_concurrency 1

def execute(args = nil)
return unless SiteSetting.translator_enabled
return unless should_backfill?
return unless secure_backfill_lock

begin
process_batch
ensure
Discourse.redis.del(BACKFILL_LOCK_KEY)
end
process_batch
end

def fetch_untranslated_model_ids(model, content_column, limit, target_locale)
m = model.name.downcase
DB.query_single(<<~SQL, target_locale: target_locale, limit: limit)
SELECT m.id
FROM #{model.table_name} m
#{limit_to_public_clause(model)}
WHERE m.deleted_at IS NULL
AND m.#{content_column} != ''
AND m.user_id > 0
#{max_age_clause}
AND (
NOT EXISTS (
SELECT 1
Expand Down Expand Up @@ -58,10 +54,6 @@ def should_backfill?
true
end

def secure_backfill_lock
Discourse.redis.set(BACKFILL_LOCK_KEY, "1", ex: 5.minutes.to_i, nx: true)
end

def translations_per_run
[(SiteSetting.automatic_translation_backfill_maximum_translations_per_hour / 12), 1].max
end
Expand Down Expand Up @@ -118,5 +110,33 @@ def process_batch
translate_records(Post, post_ids, target_locale)
end
end

def max_age_clause
return "" if SiteSetting.automatic_translation_backfill_max_age_days <= 0

"AND m.created_at > NOW() - INTERVAL '#{SiteSetting.automatic_translation_backfill_max_age_days} days'"
end

def limit_to_public_clause(model)
return "" if !SiteSetting.automatic_translation_backfill_limit_to_public_content

public_categories = Category.where(read_restricted: false).pluck(:id).join(", ")
if model == Post
limit_to_public_clause = <<~SQL
INNER JOIN topics t
ON m.topic_id = t.id
AND t.archetype = 'regular'
AND t.category_id IN (#{public_categories})
SQL
elsif model == Topic
limit_to_public_clause = <<~SQL
INNER JOIN categories c
ON m.category_id = c.id
AND c.id IN (#{public_categories})
SQL
end

limit_to_public_clause
end
end
end
8 changes: 8 additions & 0 deletions config/settings.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,14 @@ discourse_translator:
default: 0
client: false
hidden: true
automatic_translation_backfill_limit_to_public_content:
default: false
client: false
hidden: true
automatic_translation_backfill_max_age_days:
default: 0
client: false
hidden: true
translator_azure_subscription_key:
default: ''
translator_azure_region:
Expand Down
65 changes: 58 additions & 7 deletions spec/jobs/automatic_translation_backfill_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -65,17 +65,12 @@ def expect_google_translate(text)
expect_any_instance_of(Jobs::AutomaticTranslationBackfill).not_to receive(:process_batch)
end

it "does not backfill if backfill lock is not secure" do
SiteSetting.automatic_translation_backfill_maximum_translations_per_hour = 1
SiteSetting.automatic_translation_target_languages = "de"
Discourse.redis.set("discourse_translator_backfill_lock", "1")
expect_any_instance_of(Jobs::AutomaticTranslationBackfill).not_to receive(:translate_records)
end

describe "with two locales ['de', 'es']" do
before do
SiteSetting.automatic_translation_backfill_maximum_translations_per_hour = 100
SiteSetting.automatic_translation_target_languages = "de|es"
SiteSetting.automatic_translation_backfill_limit_to_public_content = true
SiteSetting.automatic_translation_backfill_max_age_days = 5
expect_google_check_language
end

Expand Down Expand Up @@ -104,6 +99,62 @@ def expect_google_translate(text)
expect(topic.translations.pluck(:locale, :translation)).to eq([%w[es hola]])
expect(post.translations.pluck(:locale, :translation)).to eq([%w[de hallo]])
end

it "backfills only public content when limit_to_public_content is true" do
post = Fabricate(:post)
topic = post.topic

private_category = Fabricate(:private_category, name: "Staff", group: Group[:staff])
private_topic = Fabricate(:topic, category: private_category)
private_post = Fabricate(:post, topic: private_topic)

topic.set_detected_locale("de")
post.set_detected_locale("es")
private_topic.set_detected_locale("de")
private_post.set_detected_locale("es")

expect_google_translate("hola")
expect_google_translate("hallo")

SiteSetting.automatic_translation_backfill_limit_to_public_content = true
described_class.new.execute

expect(topic.translations.pluck(:locale, :translation)).to eq([%w[es hola]])
expect(post.translations.pluck(:locale, :translation)).to eq([%w[de hallo]])

expect(private_topic.translations).to eq([])
expect(private_post.translations).to eq([])
end

it "translate only content newer than automatic_translation_backfill_max_age_days" do
old_post = Fabricate(:post)
old_topic = old_post.topic
new_post = Fabricate(:post)
new_topic = new_post.topic

old_topic.set_detected_locale("de")
new_topic.set_detected_locale("de")
old_post.set_detected_locale("es")
new_post.set_detected_locale("es")

old_topic.created_at = 5.days.ago
old_topic.save!
old_post.created_at = 5.days.ago
old_post.save!
new_topic.created_at = 1.days.ago
new_topic.save!
new_post.created_at = 1.days.ago
new_post.save!

expect_google_translate("hola")
expect_google_translate("hallo")

SiteSetting.automatic_translation_backfill_max_age_days = 3
described_class.new.execute

expect(old_post.translations).to eq([])
expect(new_post.translations.pluck(:locale, :translation)).to eq([%w[de hallo]])
end
end

describe "with just one locale ['de']" do
Expand Down
Loading