Skip to content

Commit 0c71b6e

Browse files
authored
FEATURE: Add hidden settings to limit automatic translation scope (#255)
* DEV: Remove manual lock in favor of cluster_concurrency This is a Discourse native feature that guarantees that we won't run multiples of the same job at the same time. * FEATURE: Add hidden settings to limit automatic translation scope automatic_translation_backfill_max_age_days controls max age for something to be eligible for translation automatic_translation_backfill_limit_to_public_content limits the automatic translation to posts and topics in categories who are public.
1 parent e4384b4 commit 0c71b6e

File tree

3 files changed

+98
-19
lines changed

3 files changed

+98
-19
lines changed

app/jobs/scheduled/automatic_translation_backfill.rb

Lines changed: 32 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,29 +3,25 @@
33
module Jobs
44
class AutomaticTranslationBackfill < ::Jobs::Scheduled
55
every 5.minutes
6-
7-
BACKFILL_LOCK_KEY = "discourse_translator_backfill_lock"
6+
cluster_concurrency 1
87

98
def execute(args = nil)
109
return unless SiteSetting.translator_enabled
1110
return unless should_backfill?
12-
return unless secure_backfill_lock
1311

14-
begin
15-
process_batch
16-
ensure
17-
Discourse.redis.del(BACKFILL_LOCK_KEY)
18-
end
12+
process_batch
1913
end
2014

2115
def fetch_untranslated_model_ids(model, content_column, limit, target_locale)
2216
m = model.name.downcase
2317
DB.query_single(<<~SQL, target_locale: target_locale, limit: limit)
2418
SELECT m.id
2519
FROM #{model.table_name} m
20+
#{limit_to_public_clause(model)}
2621
WHERE m.deleted_at IS NULL
2722
AND m.#{content_column} != ''
2823
AND m.user_id > 0
24+
#{max_age_clause}
2925
AND (
3026
NOT EXISTS (
3127
SELECT 1
@@ -58,10 +54,6 @@ def should_backfill?
5854
true
5955
end
6056

61-
def secure_backfill_lock
62-
Discourse.redis.set(BACKFILL_LOCK_KEY, "1", ex: 5.minutes.to_i, nx: true)
63-
end
64-
6557
def translations_per_run
6658
[(SiteSetting.automatic_translation_backfill_maximum_translations_per_hour / 12), 1].max
6759
end
@@ -118,5 +110,33 @@ def process_batch
118110
translate_records(Post, post_ids, target_locale)
119111
end
120112
end
113+
114+
def max_age_clause
115+
return "" if SiteSetting.automatic_translation_backfill_max_age_days <= 0
116+
117+
"AND m.created_at > NOW() - INTERVAL '#{SiteSetting.automatic_translation_backfill_max_age_days} days'"
118+
end
119+
120+
def limit_to_public_clause(model)
121+
return "" if !SiteSetting.automatic_translation_backfill_limit_to_public_content
122+
123+
public_categories = Category.where(read_restricted: false).pluck(:id).join(", ")
124+
if model == Post
125+
limit_to_public_clause = <<~SQL
126+
INNER JOIN topics t
127+
ON m.topic_id = t.id
128+
AND t.archetype = 'regular'
129+
AND t.category_id IN (#{public_categories})
130+
SQL
131+
elsif model == Topic
132+
limit_to_public_clause = <<~SQL
133+
INNER JOIN categories c
134+
ON m.category_id = c.id
135+
AND c.id IN (#{public_categories})
136+
SQL
137+
end
138+
139+
limit_to_public_clause
140+
end
121141
end
122142
end

config/settings.yml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,14 @@ discourse_translator:
2626
default: 0
2727
client: false
2828
hidden: true
29+
automatic_translation_backfill_limit_to_public_content:
30+
default: false
31+
client: false
32+
hidden: true
33+
automatic_translation_backfill_max_age_days:
34+
default: 0
35+
client: false
36+
hidden: true
2937
translator_azure_subscription_key:
3038
default: ''
3139
translator_azure_region:

spec/jobs/automatic_translation_backfill_spec.rb

Lines changed: 58 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -65,17 +65,12 @@ def expect_google_translate(text)
6565
expect_any_instance_of(Jobs::AutomaticTranslationBackfill).not_to receive(:process_batch)
6666
end
6767

68-
it "does not backfill if backfill lock is not secure" do
69-
SiteSetting.automatic_translation_backfill_maximum_translations_per_hour = 1
70-
SiteSetting.automatic_translation_target_languages = "de"
71-
Discourse.redis.set("discourse_translator_backfill_lock", "1")
72-
expect_any_instance_of(Jobs::AutomaticTranslationBackfill).not_to receive(:translate_records)
73-
end
74-
7568
describe "with two locales ['de', 'es']" do
7669
before do
7770
SiteSetting.automatic_translation_backfill_maximum_translations_per_hour = 100
7871
SiteSetting.automatic_translation_target_languages = "de|es"
72+
SiteSetting.automatic_translation_backfill_limit_to_public_content = true
73+
SiteSetting.automatic_translation_backfill_max_age_days = 5
7974
expect_google_check_language
8075
end
8176

@@ -104,6 +99,62 @@ def expect_google_translate(text)
10499
expect(topic.translations.pluck(:locale, :translation)).to eq([%w[es hola]])
105100
expect(post.translations.pluck(:locale, :translation)).to eq([%w[de hallo]])
106101
end
102+
103+
it "backfills only public content when limit_to_public_content is true" do
104+
post = Fabricate(:post)
105+
topic = post.topic
106+
107+
private_category = Fabricate(:private_category, name: "Staff", group: Group[:staff])
108+
private_topic = Fabricate(:topic, category: private_category)
109+
private_post = Fabricate(:post, topic: private_topic)
110+
111+
topic.set_detected_locale("de")
112+
post.set_detected_locale("es")
113+
private_topic.set_detected_locale("de")
114+
private_post.set_detected_locale("es")
115+
116+
expect_google_translate("hola")
117+
expect_google_translate("hallo")
118+
119+
SiteSetting.automatic_translation_backfill_limit_to_public_content = true
120+
described_class.new.execute
121+
122+
expect(topic.translations.pluck(:locale, :translation)).to eq([%w[es hola]])
123+
expect(post.translations.pluck(:locale, :translation)).to eq([%w[de hallo]])
124+
125+
expect(private_topic.translations).to eq([])
126+
expect(private_post.translations).to eq([])
127+
end
128+
129+
it "translate only content newer than automatic_translation_backfill_max_age_days" do
130+
old_post = Fabricate(:post)
131+
old_topic = old_post.topic
132+
new_post = Fabricate(:post)
133+
new_topic = new_post.topic
134+
135+
old_topic.set_detected_locale("de")
136+
new_topic.set_detected_locale("de")
137+
old_post.set_detected_locale("es")
138+
new_post.set_detected_locale("es")
139+
140+
old_topic.created_at = 5.days.ago
141+
old_topic.save!
142+
old_post.created_at = 5.days.ago
143+
old_post.save!
144+
new_topic.created_at = 1.days.ago
145+
new_topic.save!
146+
new_post.created_at = 1.days.ago
147+
new_post.save!
148+
149+
expect_google_translate("hola")
150+
expect_google_translate("hallo")
151+
152+
SiteSetting.automatic_translation_backfill_max_age_days = 3
153+
described_class.new.execute
154+
155+
expect(old_post.translations).to eq([])
156+
expect(new_post.translations.pluck(:locale, :translation)).to eq([%w[de hallo]])
157+
end
107158
end
108159

109160
describe "with just one locale ['de']" do

0 commit comments

Comments
 (0)