Skip to content

Commit e974006

Browse files
authored
FIX: Revert translating raw for translator providers as they strip newlines (#250)
In 6c197c5, we changed to translate raw instead of cooked. This was largely due to DiscourseAI. However some tests have shown that the original decision of translating cooked is likely due to translation providers like Google or Azure (and the rest) stripping newlines when translating. However these translator providers keep html as-is, so translating cooked is reasonable. This commit reverts the other providers to use cooked, but keeps DiscourseAI as the only provider using raw. The following is a trivial example, but in some more complex cases, the absence of certain characters (besides newline) removed by the provider when dealing with raw is a bit more jarring.
1 parent 04e9ac5 commit e974006

File tree

9 files changed

+34
-33
lines changed

9 files changed

+34
-33
lines changed

app/services/discourse_translator/base.rb

Lines changed: 7 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ def self.translate(translatable, target_locale_sym = I18n.locale)
3333
detected_lang = detect(translatable)
3434

3535
if translatable.locale_matches?(target_locale_sym)
36-
return detected_lang, get_untranslated_cooked(translatable)
36+
return detected_lang, get_untranslated(translatable)
3737
end
3838

3939
translation = translatable.translation_for(target_locale_sym)
@@ -50,9 +50,7 @@ def self.translate(translatable, target_locale_sym = I18n.locale)
5050
end
5151

5252
translated = translate!(translatable, target_locale_sym)
53-
save_translation(translatable, target_locale_sym) do
54-
TranslatedContentNormalizer.normalize(translatable, translated)
55-
end
53+
save_translation(translatable, target_locale_sym) { translated }
5654
[detected_lang, translated]
5755
end
5856

@@ -125,27 +123,18 @@ def self.translate_supported?(detected_lang, target_lang)
125123
private
126124

127125
def self.text_for_detection(translatable)
128-
get_untranslated_raw(translatable).truncate(DETECTION_CHAR_LIMIT, omission: nil)
126+
get_untranslated(translatable, raw: true).truncate(DETECTION_CHAR_LIMIT, omission: nil)
129127
end
130128

131-
def self.text_for_translation(translatable)
129+
def self.text_for_translation(translatable, raw: false)
132130
max_char = SiteSetting.max_characters_per_translation
133-
get_untranslated_raw(translatable).truncate(max_char, omission: nil)
134-
end
135-
136-
def self.get_untranslated_raw(translatable)
137-
case translatable.class.name
138-
when "Post"
139-
translatable.raw
140-
when "Topic"
141-
translatable.title
142-
end
131+
get_untranslated(translatable, raw:).truncate(max_char, omission: nil)
143132
end
144133

145-
def self.get_untranslated_cooked(translatable)
134+
def self.get_untranslated(translatable, raw: false)
146135
case translatable.class.name
147136
when "Post"
148-
translatable.cooked
137+
raw ? translatable.raw : translatable.cooked
149138
when "Topic"
150139
translatable.title
151140
end

app/services/discourse_translator/discourse_ai.rb

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,12 @@ def self.translate!(translatable, target_locale_sym = I18n.locale)
3333
),
3434
)
3535
end
36-
::DiscourseAi::Translator.new(text_for_translation(translatable), target_locale_sym).translate
36+
translated =
37+
::DiscourseAi::Translator.new(
38+
text_for_translation(translatable, raw: true),
39+
target_locale_sym,
40+
).translate
41+
DiscourseTranslator::TranslatedContentNormalizer.normalize(translatable, translated)
3742
end
3843

3944
private

spec/jobs/automatic_translation_backfill_spec.rb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ def expect_google_translate(text)
102102
described_class.new.execute
103103

104104
expect(topic.translations.pluck(:locale, :translation)).to eq([%w[es hola]])
105-
expect(post.translations.pluck(:locale, :translation)).to eq([%w[de <p>hallo</p>]])
105+
expect(post.translations.pluck(:locale, :translation)).to eq([%w[de hallo]])
106106
end
107107
end
108108

@@ -126,7 +126,7 @@ def expect_google_translate(text)
126126

127127
expect(topic.translations.pluck(:locale, :translation)).to eq([%w[de hallo]])
128128
expect(posts.map { |p| p.translations.pluck(:locale, :translation).flatten }).to eq(
129-
[%w[de <p>hallo</p>]] * 4,
129+
[%w[de hallo]] * 4,
130130
)
131131
end
132132
end

spec/services/amazon_spec.rb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
describe ".detect" do
2020
let(:post) { Fabricate(:post) }
2121
let!(:client) { Aws::Translate::Client.new(stub_responses: true) }
22-
let(:text) { described_class.truncate(post.raw) }
22+
let(:text) { described_class.truncate(post.cooked) }
2323
let(:detected_lang) { "en" }
2424

2525
before do
@@ -40,7 +40,7 @@
4040
expect(post.detected_locale).to eq(detected_lang)
4141
end
4242

43-
it "should fail graciously when the raw translated text is blank" do
43+
it "should fail graciously when the cooked translated text is blank" do
4444
post.raw = ""
4545
expect(described_class.detect(post)).to be_nil
4646
end

spec/services/base_spec.rb

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -52,11 +52,18 @@ class EmptyTranslator < DiscourseTranslator::Base
5252
fab!(:post)
5353

5454
it "truncates to max_characters_per_translation" do
55-
post.raw = "a" * (SiteSetting.max_characters_per_translation + 1)
55+
post.cooked = "a" * (SiteSetting.max_characters_per_translation + 1)
5656
expect(DiscourseTranslator::Base.text_for_translation(post).length).to eq(
5757
SiteSetting.max_characters_per_translation,
5858
)
5959
end
60+
61+
it "uses raw if required" do
62+
post.raw = "a" * (SiteSetting.max_characters_per_translation + 1)
63+
expect(DiscourseTranslator::Base.text_for_translation(post, raw: true).length).to eq(
64+
SiteSetting.max_characters_per_translation,
65+
)
66+
end
6067
end
6168

6269
describe ".detect" do
@@ -93,15 +100,15 @@ class EmptyTranslator < DiscourseTranslator::Base
93100
fab!(:post)
94101

95102
it "returns nil when text is blank" do
96-
post.raw = ""
103+
post.cooked = ""
97104
expect(TestTranslator.translate(post)).to be_nil
98105
end
99106

100107
it "returns original text when detected language matches current locale" do
101108
TestTranslator.save_detected_locale(post) { I18n.locale.to_s }
102-
post.update(raw: "hello")
109+
post.cooked = "hello"
103110

104-
expect(TestTranslator.translate(post)).to eq(%w[en <p>hello</p>])
111+
expect(TestTranslator.translate(post)).to eq(%w[en hello])
105112
end
106113

107114
it "returns cached translation if available" do

spec/services/discourse_ai_spec.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@
4242
DiscourseAi::Completions::Llm.with_prepared_responses(["some translated text"]) do
4343
locale, translated_text = DiscourseTranslator::DiscourseAi.translate(post)
4444
expect(locale).to eq "de"
45-
expect(translated_text).to eq "some translated text"
45+
expect(translated_text).to eq "<p>some translated text</p>"
4646
end
4747
end
4848
end

spec/services/google_spec.rb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -149,10 +149,10 @@
149149

150150
it "truncates text for translation to max_characters_per_translation setting" do
151151
SiteSetting.max_characters_per_translation = 50
152-
post.raw = "a" * 100
152+
post.cooked = "a" * 100
153153
post.set_detected_locale("de")
154154
body = {
155-
q: post.raw.truncate(SiteSetting.max_characters_per_translation, omission: nil),
155+
q: post.cooked.truncate(SiteSetting.max_characters_per_translation, omission: nil),
156156
source: "de",
157157
target: "en",
158158
key: api_key,

spec/services/libre_translate_spec.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@
4444
it "truncates text for translation to max_characters_per_translation setting" do
4545
SiteSetting.max_characters_per_translation = 50
4646
post.set_detected_locale("de")
47-
body = { q: post.raw, source: "de", target: "en", format: "html", api_key: api_key }
47+
body = { q: post.cooked, source: "de", target: "en", format: "html", api_key: api_key }
4848

4949
translated_text = "hur dur hur dur"
5050
# https://publicapi.dev/libre-translate-api

spec/services/microsoft_spec.rb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,7 @@ def translate_endpoint
176176
it "raises an error if the post is too long to be translated" do
177177
I18n.locale = "ja"
178178
SiteSetting.max_characters_per_translation = 100_000
179-
post.update_columns(raw: "*" * (DiscourseTranslator::Microsoft::LENGTH_LIMIT + 1))
179+
post.update_columns(cooked: "*" * (DiscourseTranslator::Microsoft::LENGTH_LIMIT + 1))
180180

181181
expect { described_class.translate(post) }.to raise_error(
182182
DiscourseTranslator::TranslatorError,
@@ -190,7 +190,7 @@ def translate_endpoint
190190
:post,
191191
"https://api.cognitive.microsofttranslator.com/translate?api-version=3.0&from=en&textType=html&to=ja",
192192
).with(
193-
body: "[{\"Text\":\"Hello world\"}]",
193+
body: "[{\"Text\":\"\\u003cp\\u003eHello world\\u003c/p\\u003e\"}]",
194194
headers: {
195195
"Ocp-Apim-Subscription-Key" => SiteSetting.translator_azure_subscription_key,
196196
"Content-Type" => "application/json",

0 commit comments

Comments
 (0)