Skip to content
This repository was archived by the owner on Jul 22, 2025. It is now read-only.

Commit c92cff1

Browse files
committed
DEV: Also detect locale of categories and do not translate if already in that locale
1 parent 3e74eea commit c92cff1

File tree

6 files changed

+261
-18
lines changed

6 files changed

+261
-18
lines changed

app/jobs/regular/localize_categories.rb

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,8 @@ def execute(args)
1717
cat_id = args[:from_category_id] || Category.order(:id).first&.id
1818
last_id = nil
1919

20-
categories = Category.where("id >= ?", cat_id).order(:id).limit(BATCH_SIZE)
20+
categories =
21+
Category.where("id >= ? AND locale IS NOT NULL", cat_id).order(:id).limit(BATCH_SIZE)
2122
return if categories.empty?
2223

2324
categories.each do |category|
@@ -26,9 +27,13 @@ def execute(args)
2627
next
2728
end
2829

29-
CategoryLocalization.transaction do
30-
locales.each do |locale|
31-
next if CategoryLocalization.exists?(category_id: category.id, locale: locale)
30+
locales.each do |locale|
31+
localization = category.category_localizations.find_by(locale:)
32+
33+
if locale == category.locale && localization
34+
localization.destroy
35+
else
36+
next if locale == category.locale
3237
begin
3338
DiscourseAi::Translation::CategoryLocalizer.localize(category, locale)
3439
rescue FinalDestination::SSRFDetector::LookupFailedError
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
# frozen_string_literal: true
2+
3+
module Jobs
4+
class CategoriesLocaleDetectionBackfill < ::Jobs::Scheduled
5+
every 1.hour
6+
sidekiq_options retry: false
7+
cluster_concurrency 1
8+
9+
def execute(args)
10+
return if !SiteSetting.discourse_ai_enabled
11+
return if !SiteSetting.ai_translation_enabled
12+
return if SiteSetting.ai_translation_backfill_rate == 0
13+
14+
categories = Category.where(locale: nil)
15+
16+
if SiteSetting.ai_translation_backfill_limit_to_public_content
17+
categories = categories.where(read_restricted: false)
18+
end
19+
20+
categories = categories.limit(SiteSetting.ai_translation_backfill_rate)
21+
return if categories.empty?
22+
23+
categories.each do |category|
24+
begin
25+
DiscourseAi::Translation::CategoryLocaleDetector.detect_locale(category)
26+
rescue FinalDestination::SSRFDetector::LookupFailedError
27+
rescue => e
28+
DiscourseAi::Translation::VerboseLogger.log(
29+
"Failed to detect category #{category.id}'s locale: #{e.message}",
30+
)
31+
end
32+
end
33+
34+
DiscourseAi::Translation::VerboseLogger.log("Detected #{categories.size} category locales")
35+
end
36+
end
37+
end
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# frozen_string_literal: true
2+
3+
module DiscourseAi
4+
module Translation
5+
class CategoryLocaleDetector
6+
def self.detect_locale(category)
7+
return if category.blank?
8+
9+
text = [category.name, category.description].compact.join("\n\n")
10+
return if text.blank?
11+
12+
detected_locale = LanguageDetector.new(text).detect
13+
locale = LocaleNormalizer.normalize_to_i18n(detected_locale)
14+
category.update_column(:locale, locale)
15+
locale
16+
end
17+
end
18+
end
19+
end

spec/jobs/regular/localize_categories_spec.rb

Lines changed: 51 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,9 @@ def localize_all_categories(*locales)
5353
end
5454

5555
it "translates categories to the configured locales" do
56+
Category.update_all(locale: "en")
5657
number_of_categories = Category.count
58+
5759
DiscourseAi::Translation::CategoryLocalizer
5860
.expects(:localize)
5961
.with(is_a(Category), "pt")
@@ -69,20 +71,19 @@ def localize_all_categories(*locales)
6971
it "skips categories that already have localizations" do
7072
localize_all_categories("pt", "zh_CN")
7173

72-
category1 =
73-
Fabricate(:category, name: "First Category", description: "First category description")
74-
Fabricate(:category_localization, category: category1, locale: "pt", name: "Primeira Categoria")
75-
76-
# It should only translate to Chinese, not Portuguese
77-
DiscourseAi::Translation::CategoryLocalizer.expects(:localize).with(category1, "pt").never
78-
DiscourseAi::Translation::CategoryLocalizer.expects(:localize).with(category1, "zh_CN").once
74+
DiscourseAi::Translation::CategoryLocalizer.expects(:localize).with(is_a(Category), "pt").never
75+
DiscourseAi::Translation::CategoryLocalizer
76+
.expects(:localize)
77+
.with(is_a(Category), "zh_CN")
78+
.never
7979

8080
job.execute({})
8181
end
8282

8383
it "continues from a specified category ID" do
84-
category1 = Fabricate(:category, name: "First", description: "First description")
85-
category2 = Fabricate(:category, name: "Second", description: "Second description")
84+
category1 = Fabricate(:category, name: "First", description: "First description", locale: "en")
85+
category2 =
86+
Fabricate(:category, name: "Second", description: "Second description", locale: "en")
8687

8788
DiscourseAi::Translation::CategoryLocalizer
8889
.expects(:localize)
@@ -99,7 +100,7 @@ def localize_all_categories(*locales)
99100
it "handles translation errors gracefully" do
100101
localize_all_categories("pt", "zh_CN")
101102

102-
category1 = Fabricate(:category, name: "First", description: "First description")
103+
category1 = Fabricate(:category, name: "First", description: "First description", locale: "en")
103104
DiscourseAi::Translation::CategoryLocalizer
104105
.expects(:localize)
105106
.with(category1, "pt")
@@ -110,6 +111,8 @@ def localize_all_categories(*locales)
110111
end
111112

112113
it "enqueues the next batch when there are more categories" do
114+
Category.update_all(locale: "en")
115+
113116
Jobs.run_later!
114117
freeze_time
115118
Jobs::LocalizeCategories.const_set(:BATCH_SIZE, 1)
@@ -134,10 +137,8 @@ def localize_all_categories(*locales)
134137
it "skips read-restricted categories when configured" do
135138
SiteSetting.ai_translation_backfill_limit_to_public_content = true
136139

137-
category1 = Fabricate(:category, name: "Public Category", read_restricted: false)
138-
category2 = Fabricate(:category, name: "Private Category", read_restricted: true)
139-
140-
DiscourseAi::Translation::CategoryLocalizer.expects(:localize).at_least_once
140+
category1 = Fabricate(:category, name: "Public Category", read_restricted: false, locale: "en")
141+
category2 = Fabricate(:category, name: "Private Category", read_restricted: true, locale: "en")
141142

142143
DiscourseAi::Translation::CategoryLocalizer
143144
.expects(:localize)
@@ -150,4 +151,40 @@ def localize_all_categories(*locales)
150151

151152
job.execute({})
152153
end
154+
155+
it "skips creating localizations in the same language as the category's locale" do
156+
Category.update_all(locale: "pt")
157+
158+
DiscourseAi::Translation::CategoryLocalizer.expects(:localize).with(is_a(Category), "pt").never
159+
DiscourseAi::Translation::CategoryLocalizer
160+
.expects(:localize)
161+
.with(is_a(Category), "zh_CN")
162+
.times(Category.count)
163+
164+
job.execute({})
165+
end
166+
167+
it "deletes existing localizations that match the category's locale" do
168+
# update all categories to portuguese
169+
Category.update_all(locale: "pt")
170+
171+
localize_all_categories("pt", "zh_CN")
172+
173+
expect { job.execute({}) }.to change { CategoryLocalization.exists?(locale: "pt") }.from(
174+
true,
175+
).to(false)
176+
end
177+
178+
it "doesn't process categories with nil locale" do
179+
# Add a category with nil locale
180+
nil_locale_category = Fabricate(:category, name: "No Locale", locale: nil)
181+
182+
# Make sure our query for categories with non-null locales excludes it
183+
DiscourseAi::Translation::CategoryLocalizer
184+
.expects(:localize)
185+
.with(nil_locale_category, any_parameters)
186+
.never
187+
188+
job.execute({})
189+
end
153190
end
Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
# frozen_string_literal: true
2+
3+
describe Jobs::CategoriesLocaleDetectionBackfill do
4+
fab!(:category) { Fabricate(:category, locale: nil) }
5+
subject(:job) { described_class.new }
6+
7+
before do
8+
SiteSetting.discourse_ai_enabled = true
9+
Fabricate(:fake_model).tap do |fake_llm|
10+
SiteSetting.public_send("ai_translation_model=", "custom:#{fake_llm.id}")
11+
end
12+
SiteSetting.ai_translation_enabled = true
13+
SiteSetting.ai_translation_backfill_rate = 100
14+
end
15+
16+
it "does nothing when AI is disabled" do
17+
SiteSetting.discourse_ai_enabled = false
18+
DiscourseAi::Translation::CategoryLocaleDetector.expects(:detect_locale).never
19+
20+
job.execute({})
21+
end
22+
23+
it "does nothing when content translation is disabled" do
24+
SiteSetting.ai_translation_enabled = false
25+
DiscourseAi::Translation::CategoryLocaleDetector.expects(:detect_locale).never
26+
27+
job.execute({})
28+
end
29+
30+
it "does nothing when backfill rate is 0" do
31+
SiteSetting.ai_translation_backfill_rate = 0
32+
DiscourseAi::Translation::CategoryLocaleDetector.expects(:detect_locale).never
33+
34+
job.execute({})
35+
end
36+
37+
it "does nothing when there are no categories to detect" do
38+
Category.update_all(locale: "en")
39+
DiscourseAi::Translation::CategoryLocaleDetector.expects(:detect_locale).never
40+
41+
job.execute({})
42+
end
43+
44+
it "detects locale for categories with nil locale" do
45+
DiscourseAi::Translation::CategoryLocaleDetector.expects(:detect_locale).with(is_a(Category)).times(Category.count)
46+
47+
job.execute({})
48+
end
49+
50+
it "handles detection errors gracefully" do
51+
DiscourseAi::Translation::CategoryLocaleDetector.expects(:detect_locale).with(is_a(Category)).at_least_once
52+
DiscourseAi::Translation::CategoryLocaleDetector
53+
.expects(:detect_locale)
54+
.with(category)
55+
.raises(StandardError.new("error"))
56+
.once
57+
58+
expect { job.execute({}) }.not_to raise_error
59+
end
60+
61+
it "logs a summary after running" do
62+
DiscourseAi::Translation::CategoryLocaleDetector.stubs(:detect_locale)
63+
DiscourseAi::Translation::VerboseLogger.expects(:log).with(includes("Detected #{Category.count} category locales"))
64+
65+
job.execute({})
66+
end
67+
68+
describe "with public content limitation" do
69+
fab!(:private_category) { Fabricate(:private_category, group: Group[:staff], locale: nil) }
70+
71+
before do
72+
# catch-all for other categories
73+
DiscourseAi::Translation::CategoryLocaleDetector.expects(:detect_locale).with(is_a(Category)).at_least_once
74+
75+
SiteSetting.ai_translation_backfill_limit_to_public_content = true
76+
end
77+
78+
it "only processes public categories" do
79+
DiscourseAi::Translation::CategoryLocaleDetector.expects(:detect_locale).with(category).once
80+
DiscourseAi::Translation::CategoryLocaleDetector.expects(:detect_locale).with(private_category).never
81+
82+
job.execute({})
83+
end
84+
85+
it "processes all categories when setting is disabled" do
86+
SiteSetting.ai_translation_backfill_limit_to_public_content = false
87+
88+
DiscourseAi::Translation::CategoryLocaleDetector.expects(:detect_locale).with(category).once
89+
DiscourseAi::Translation::CategoryLocaleDetector.expects(:detect_locale).with(private_category).once
90+
91+
job.execute({})
92+
end
93+
end
94+
95+
it "limits processing to the backfill rate" do
96+
SiteSetting.ai_translation_backfill_rate = 1
97+
Fabricate(:category, locale: nil)
98+
99+
DiscourseAi::Translation::CategoryLocaleDetector.expects(:detect_locale).once
100+
101+
job.execute({})
102+
end
103+
end
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
# frozen_string_literal: true
2+
3+
describe DiscourseAi::Translation::CategoryLocaleDetector do
4+
describe ".detect_locale" do
5+
fab!(:category) { Fabricate(:category, name: "Hello world", description: "Welcome to this category", locale: nil) }
6+
7+
def language_detector_stub(opts)
8+
mock = instance_double(DiscourseAi::Translation::LanguageDetector)
9+
allow(DiscourseAi::Translation::LanguageDetector).to receive(:new).with(
10+
opts[:text],
11+
).and_return(mock)
12+
allow(mock).to receive(:detect).and_return(opts[:locale])
13+
end
14+
15+
it "returns nil if category is blank" do
16+
expect(described_class.detect_locale(nil)).to eq(nil)
17+
end
18+
19+
it "updates the category locale with the detected locale" do
20+
text = "#{category.name}\n\n#{category.description}"
21+
language_detector_stub({ text: text, locale: "zh_CN" })
22+
23+
expect { described_class.detect_locale(category) }.to change { category.reload.locale }.from(nil).to(
24+
"zh_CN",
25+
)
26+
end
27+
28+
it "handles category with no description" do
29+
no_description_category = Fabricate(:category, name: "Test Category", description: nil, locale: nil)
30+
language_detector_stub({ text: no_description_category.name, locale: "fr" })
31+
32+
expect { described_class.detect_locale(no_description_category) }.to change { no_description_category.reload.locale }.from(nil).to("fr")
33+
end
34+
35+
it "bypasses validations when updating locale" do
36+
language_detector_stub({ text: "#{category.name}\n\n#{category.description}", locale: "zh_CN" })
37+
38+
described_class.detect_locale(category)
39+
expect(category.reload.locale).to eq("zh_CN")
40+
end
41+
end
42+
end

0 commit comments

Comments
 (0)