Skip to content

Commit 39fc55f

Browse files
authored
FEATURE: Detect locale and translate posts from core table (#287)
This PR adds two jobs that fill in `post.locale` and adds `PostLocalization` for each defined locale in `SiteSetting.automatic_translation_target_languages`. It also creates dedicated `DiscourseTranslator::PostTranslator` and `DiscourseTranslator::PostLocaleDetector` to serve this purpose. We should be moving away from dealing with `translatables` in the near future.
1 parent 14ca3c0 commit 39fc55f

File tree

11 files changed

+453
-21
lines changed

11 files changed

+453
-21
lines changed

.discourse-compatibility

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
< 3.5.0.beta4-dev: 14ca3c07efa0a80712a4cbb8ca455c32a727adec
12
< 3.5.0.beta2-dev: 5f24835801fdc7cb98e1bcf42d2ab2e49e609921
23
< 3.5.0.beta1-dev: 7d411e458bdd449f8aead2bc07cedeb00b856798
34
< 3.4.0.beta3-dev: b4cf3a065884816fa3f770248c2bf908ba65d8ac
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
# frozen_string_literal: true
2+
3+
module Jobs
4+
class DetectPostsLocale < ::Jobs::Base
5+
cluster_concurrency 1
6+
sidekiq_options retry: false
7+
8+
BATCH_SIZE = 50
9+
10+
def execute(args)
11+
return unless SiteSetting.translator_enabled
12+
return unless SiteSetting.experimental_content_translation
13+
14+
posts =
15+
Post
16+
.where(locale: nil)
17+
.where(deleted_at: nil)
18+
.where("posts.user_id > 0")
19+
.where.not(raw: [nil, ""])
20+
.order(updated_at: :desc)
21+
.limit(BATCH_SIZE)
22+
return if posts.empty?
23+
24+
posts.each do |post|
25+
begin
26+
DiscourseTranslator::PostLocaleDetector.detect_locale(post)
27+
rescue => e
28+
Rails.logger.error(
29+
"Discourse Translator: Failed to detect post #{post.id}'s locale: #{e.message}",
30+
)
31+
end
32+
end
33+
34+
DiscourseTranslator::VerboseLogger.log("Detected #{posts.size} post locales")
35+
end
36+
end
37+
end
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
# frozen_string_literal: true
2+
3+
module Jobs
4+
class TranslatePosts < ::Jobs::Base
5+
cluster_concurrency 1
6+
sidekiq_options retry: false
7+
8+
BATCH_SIZE = 50
9+
10+
def execute(args)
11+
return unless SiteSetting.translator_enabled
12+
return unless SiteSetting.experimental_content_translation
13+
14+
locales = SiteSetting.automatic_translation_target_languages.split("|")
15+
return if locales.blank?
16+
17+
locales.each do |locale|
18+
posts =
19+
Post
20+
.joins(
21+
"LEFT JOIN post_localizations pl ON pl.post_id = posts.id AND pl.locale = #{ActiveRecord::Base.connection.quote(locale)}",
22+
)
23+
.where(deleted_at: nil)
24+
.where("posts.user_id > 0")
25+
.where.not(raw: [nil, ""])
26+
.where.not(locale: nil)
27+
.where.not(locale: locale)
28+
.where("pl.id IS NULL")
29+
.limit(BATCH_SIZE)
30+
31+
next if posts.empty?
32+
33+
posts.each do |post|
34+
begin
35+
DiscourseTranslator::PostTranslator.translate(post, locale)
36+
rescue => e
37+
Rails.logger.error(
38+
"Discourse Translator: Failed to translate post #{post.id} to #{locale}: #{e.message}",
39+
)
40+
end
41+
end
42+
43+
DiscourseTranslator::VerboseLogger.log("Translated #{posts.size} posts to #{locale}")
44+
end
45+
end
46+
end
47+
end
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
# frozen_string_literal: true
2+
3+
module DiscourseTranslator
4+
class PostLocaleDetector
5+
def self.detect_locale(post)
6+
return if post.blank?
7+
8+
translator = DiscourseTranslator::Provider::TranslatorProvider.get
9+
detected_locale = translator.detect!(post)
10+
post.update!(locale: detected_locale)
11+
detected_locale
12+
end
13+
end
14+
end
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
# frozen_string_literal: true
2+
3+
module DiscourseTranslator
4+
class PostTranslator
5+
def self.translate(post, target_locale = I18n.locale)
6+
return if post.blank? || target_locale.blank? || post.locale == target_locale.to_s
7+
8+
target_locale_sym = target_locale.to_s.sub("-", "_").to_sym
9+
10+
translator = DiscourseTranslator::Provider::TranslatorProvider.get
11+
translated_raw = translator.translate_post!(post, target_locale_sym)
12+
13+
localization =
14+
PostLocalization.find_or_initialize_by(post_id: post.id, locale: target_locale_sym.to_s)
15+
16+
localization.raw = translated_raw
17+
localization.cooked = PrettyText.cook(translated_raw)
18+
localization.post_version = post.version
19+
localization.localizer_user_id = Discourse.system_user.id
20+
localization.save!
21+
localization
22+
end
23+
end
24+
end

app/services/discourse_translator/provider/base_provider.rb

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -55,12 +55,7 @@ def self.translate(translatable, target_locale_sym = I18n.locale)
5555
[detected_lang, translated]
5656
end
5757

58-
# Subclasses must implement this method to translate the text of a
59-
# post or topic and return only the translated text.
60-
# Subclasses should use text_for_translation
61-
# @param translatable [Post|Topic]
62-
# @param target_locale_sym [Symbol]
63-
# @return [String]
58+
# TODO: Deprecate this in favour of translate_<model>
6459
def self.translate_translatable!(translatable, target_locale_sym = I18n.locale)
6560
raise "Not Implemented"
6661
end
@@ -69,6 +64,14 @@ def self.translate_text!(text, target_locale_sym = I18n.locale)
6964
raise "Not Implemented"
7065
end
7166

67+
def self.translate_post!(post, target_locale_sym = I18n.locale)
68+
translate_translatable!(post, target_locale_sym)
69+
end
70+
71+
def self.translate_topic!(topic, target_locale_sym = I18n.locale)
72+
translate_translatable!(topic, target_locale_sym)
73+
end
74+
7275
# Returns the stored detected locale of a post or topic.
7376
# If the locale does not exist yet, it will be detected first via the API then stored.
7477
# @param translatable [Post|Topic]

app/services/discourse_translator/provider/discourse_ai.rb

Lines changed: 22 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -16,25 +16,32 @@ def self.detect!(topic_or_post)
1616
end
1717

1818
def self.translate_translatable!(translatable, target_locale_sym = I18n.locale)
19+
if (translatable.class.name == "Post")
20+
translate_post!(translatable, target_locale_sym)
21+
elsif (translatable.class.name == "Topic")
22+
translate_topic!(translatable, target_locale_sym)
23+
end
24+
end
25+
26+
def self.translate_post!(post, target_locale_sym = I18n.locale)
1927
validate_required_settings!
2028

21-
language = get_language_name(target_locale_sym)
29+
text = text_for_translation(post, raw: true)
30+
chunks = DiscourseTranslator::ContentSplitter.split(text)
2231
translated =
23-
case translatable.class.name
24-
when "Post"
25-
text = text_for_translation(translatable, raw: true)
26-
chunks = DiscourseTranslator::ContentSplitter.split(text)
27-
chunks
28-
.map { |chunk| ::DiscourseAi::PostTranslator.new(chunk, target_locale_sym).translate }
29-
.join("")
30-
when "Topic"
31-
::DiscourseAi::TopicTranslator.new(
32-
text_for_translation(translatable),
33-
language,
34-
).translate
35-
end
32+
chunks
33+
.map { |chunk| ::DiscourseAi::PostTranslator.new(chunk, target_locale_sym).translate }
34+
.join("")
35+
DiscourseTranslator::TranslatedContentNormalizer.normalize(post, translated)
36+
end
3637

37-
DiscourseTranslator::TranslatedContentNormalizer.normalize(translatable, translated)
38+
def self.translate_topic!(topic, target_locale_sym = I18n.locale)
39+
validate_required_settings!
40+
41+
language = get_language_name(target_locale_sym)
42+
translated =
43+
::DiscourseAi::TopicTranslator.new(text_for_translation(topic), language).translate
44+
DiscourseTranslator::TranslatedContentNormalizer.normalize(topic, translated)
3845
end
3946

4047
def self.translate_text!(text, target_locale_sym = I18n.locale)
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
# frozen_string_literal: true
2+
3+
describe Jobs::DetectPostsLocale do
4+
fab!(:post) { Fabricate(:post, locale: nil) }
5+
subject(:job) { described_class.new }
6+
7+
before do
8+
SiteSetting.translator_enabled = true
9+
SiteSetting.experimental_content_translation = true
10+
end
11+
12+
it "does nothing when translator is disabled" do
13+
SiteSetting.translator_enabled = false
14+
DiscourseTranslator::PostLocaleDetector.expects(:detect_locale).never
15+
16+
job.execute({})
17+
end
18+
19+
it "does nothing when content translation is disabled" do
20+
SiteSetting.experimental_content_translation = false
21+
DiscourseTranslator::PostLocaleDetector.expects(:detect_locale).never
22+
23+
job.execute({})
24+
end
25+
26+
it "does nothing when there are no posts to detect" do
27+
Post.update_all(locale: "en")
28+
DiscourseTranslator::PostLocaleDetector.expects(:detect_locale).never
29+
30+
job.execute({})
31+
end
32+
33+
it "detects locale for posts with nil locale" do
34+
DiscourseTranslator::PostLocaleDetector.expects(:detect_locale).with(post).once
35+
job.execute({})
36+
end
37+
38+
it "detects most recently updated posts first" do
39+
post_2 = Fabricate(:post, locale: nil)
40+
post_3 = Fabricate(:post, locale: nil)
41+
42+
post.update!(updated_at: 3.days.ago)
43+
post_2.update!(updated_at: 2.day.ago)
44+
post_3.update!(updated_at: 4.day.ago)
45+
46+
original_batch = described_class::BATCH_SIZE
47+
described_class.const_set(:BATCH_SIZE, 1)
48+
49+
DiscourseTranslator::PostLocaleDetector.expects(:detect_locale).with(post_2).once
50+
DiscourseTranslator::PostLocaleDetector.expects(:detect_locale).with(post).never
51+
DiscourseTranslator::PostLocaleDetector.expects(:detect_locale).with(post_3).never
52+
53+
job.execute({})
54+
ensure
55+
described_class.const_set(:BATCH_SIZE, original_batch)
56+
end
57+
58+
it "skips bot posts" do
59+
post.update!(user: Discourse.system_user)
60+
DiscourseTranslator::PostLocaleDetector.expects(:detect_locale).with(post).never
61+
62+
job.execute({})
63+
end
64+
65+
it "handles detection errors gracefully" do
66+
DiscourseTranslator::PostLocaleDetector
67+
.expects(:detect_locale)
68+
.with(post)
69+
.raises(StandardError.new("jiboomz"))
70+
.once
71+
72+
expect { job.execute({}) }.not_to raise_error
73+
end
74+
75+
it "logs a summary after running" do
76+
DiscourseTranslator::PostLocaleDetector.stubs(:detect_locale)
77+
DiscourseTranslator::VerboseLogger.expects(:log).with(includes("Detected 1 post locales"))
78+
79+
job.execute({})
80+
end
81+
end

0 commit comments

Comments
 (0)