Skip to content

Commit 644a165

Browse files
authored
DEV: Hyphenate locales (#206)
Currently (before the custom fields to tables migrations), locales are sometimes saved as "pt-PT" and "pt_BR" due to the API returning the former and us saving the latter through I18n.locale. e.g. we are seeing the following in the custom fields, which would mean that the table migrations (#201) also have inherited the discrepancies. ``` #<PostCustomField:0x00007faffb49f798 id: 12321231, post_id: 1231241, name: "translated_text", value: "{\"en_GB\":\"\\u003cp\\u003eGreat post my friend \\u00...", # < locale is underscored ...> # and #<PostCustomField:0x00007faffb49dfd8 id: 12313123, post_id: 123123, name: "post_detected_lang", value: "pt-PT", # < locale is hyphenated ...> ``` This commit adds a migration to convert all values to the hyphenated version, ensures we save the hyphenated ones to the db, and introduces a `locale_matches?` on the translatable models.
1 parent 3537345 commit 644a165

File tree

12 files changed

+195
-55
lines changed

12 files changed

+195
-55
lines changed

app/models/concerns/discourse_translator/translatable.rb

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,25 @@ def set_translation(locale, text)
2323
end
2424

2525
def translation_for(locale)
26+
locale = locale.to_s.gsub("_", "-")
2627
translations.find_by(locale: locale)&.translation
2728
end
2829

2930
def detected_locale
3031
content_locale&.detected_locale
3132
end
3233

34+
def locale_matches?(locale, normalise_region: true)
35+
return false if detected_locale.blank? || locale.blank?
36+
37+
# locales can be :en :en_US "en" "en-US"
38+
detected = detected_locale.gsub("_", "-")
39+
target = locale.to_s.gsub("_", "-")
40+
detected = detected.split("-").first if normalise_region
41+
target = target.split("-").first if normalise_region
42+
detected == target
43+
end
44+
3345
private
3446

3547
def clear_translations

app/services/discourse_translator/amazon.rb

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -123,23 +123,23 @@ def self.detect!(topic_or_post)
123123
end
124124
end
125125

126-
def self.translate!(topic_or_post)
127-
detected_lang = detect(topic_or_post)
126+
def self.translate!(translatable, target_locale_sym = I18n.locale)
127+
detected_lang = detect(translatable)
128128

129-
save_translation(topic_or_post) do
129+
save_translation(translatable) do
130130
begin
131131
client.translate_text(
132132
{
133-
text: truncate(text_for_translation(topic_or_post)),
133+
text: truncate(text_for_translation(translatable)),
134134
source_language_code: "auto",
135-
target_language_code: SUPPORTED_LANG_MAPPING[I18n.locale],
135+
target_language_code: SUPPORTED_LANG_MAPPING[target_locale_sym],
136136
},
137137
)
138138
rescue Aws::Translate::Errors::UnsupportedLanguagePairException
139139
raise I18n.t(
140140
"translator.failed",
141141
source_locale: detected_lang,
142-
target_locale: I18n.locale,
142+
target_locale: target_locale_sym,
143143
)
144144
end
145145
end

app/services/discourse_translator/base.rb

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -28,21 +28,23 @@ def self.cache_key
2828
# If the translation does not exist yet, it will be translated first via the API then stored.
2929
# If the detected language is the same as the target language, the original text will be returned.
3030
# @param translatable [Post|Topic]
31-
def self.translate(translatable)
31+
def self.translate(translatable, target_locale_sym = I18n.locale)
3232
return if text_for_translation(translatable).blank?
3333
detected_lang = detect(translatable)
3434

35-
return detected_lang, get_text(translatable) if (detected_lang&.to_s == I18n.locale.to_s)
35+
if translatable.locale_matches?(target_locale_sym)
36+
return detected_lang, get_text(translatable)
37+
end
3638

37-
existing_translation = get_translation(translatable)
38-
return detected_lang, existing_translation if existing_translation.present?
39+
translation = translatable.translation_for(target_locale_sym)
40+
return detected_lang, translation if translation.present?
3941

40-
unless translate_supported?(detected_lang, I18n.locale)
42+
unless translate_supported?(detected_lang, target_locale_sym)
4143
raise TranslatorError.new(
4244
I18n.t(
4345
"translator.failed",
4446
source_locale: detected_lang,
45-
target_locale: I18n.locale,
47+
target_locale: target_locale_sym,
4648
),
4749
)
4850
end
@@ -52,7 +54,7 @@ def self.translate(translatable)
5254
# Subclasses must implement this method to translate the text of a post or topic
5355
# then use the save_translation method to store the translated text.
5456
# @param translatable [Post|Topic]
55-
def self.translate!(translatable)
57+
def self.translate!(translatable, target_locale_sym = I18n.locale)
5658
raise "Not Implemented"
5759
end
5860

@@ -75,10 +77,6 @@ def self.access_token
7577
raise "Not Implemented"
7678
end
7779

78-
def self.get_translation(translatable)
79-
translatable.translation_for(I18n.locale)
80-
end
81-
8280
def self.save_translation(translatable)
8381
translation = yield
8482
translatable.set_translation(I18n.locale, translation)

app/services/discourse_translator/discourse_ai.rb

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -19,16 +19,14 @@ def self.detect!(topic_or_post)
1919
end
2020
end
2121

22-
def self.translate(topic_or_post)
22+
def self.translate!(translatable, target_locale_sym = I18n.locale)
2323
return unless required_settings_enabled
24-
25-
detected_lang = detect(topic_or_post)
26-
translated_text =
27-
save_translation(topic_or_post) do
28-
::DiscourseAi::Translator.new(text_for_translation(topic_or_post), I18n.locale).translate
29-
end
30-
31-
[detected_lang, translated_text]
24+
save_translation(translatable) do
25+
::DiscourseAi::Translator.new(
26+
text_for_translation(translatable),
27+
target_locale_sym,
28+
).translate
29+
end
3230
end
3331

3432
private

app/services/discourse_translator/google.rb

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -89,15 +89,15 @@ def self.translate_supported?(source, target)
8989
res["languages"].any? { |obj| obj["language"] == source }
9090
end
9191

92-
def self.translate!(topic_or_post)
93-
detected_locale = detect(topic_or_post)
94-
save_translation(topic_or_post) do
92+
def self.translate!(translatable, target_locale_sym = I18n.locale)
93+
detected_locale = detect(translatable)
94+
save_translation(translatable) do
9595
res =
9696
result(
9797
TRANSLATE_URI,
98-
q: text_for_translation(topic_or_post),
98+
q: text_for_translation(translatable),
9999
source: detected_locale,
100-
target: SUPPORTED_LANG_MAPPING[I18n.locale],
100+
target: SUPPORTED_LANG_MAPPING[target_locale_sym],
101101
)
102102
res["translations"][0]["translatedText"]
103103
end

app/services/discourse_translator/libre_translate.rb

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -95,16 +95,16 @@ def self.translate_supported?(source, target)
9595
res.any? { |obj| obj["code"] == source } && res.any? { |obj| obj["code"] == lang }
9696
end
9797

98-
def self.translate!(topic_or_post)
99-
detected_lang = detect(topic_or_post)
98+
def self.translate!(translatable, target_locale_sym = I18n.locale)
99+
detected_lang = detect(translatable)
100100

101-
save_translation(topic_or_post) do
101+
save_translation(translatable) do
102102
res =
103103
result(
104104
translate_uri,
105-
q: text_for_translation(topic_or_post),
105+
q: text_for_translation(translatable),
106106
source: detected_lang,
107-
target: SUPPORTED_LANG_MAPPING[I18n.locale],
107+
target: SUPPORTED_LANG_MAPPING[target_locale],
108108
format: "html",
109109
)
110110
res["translatedText"]

app/services/discourse_translator/microsoft.rb

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -157,17 +157,19 @@ def self.detect!(topic_or_post)
157157
end
158158
end
159159

160-
def self.translate!(topic_or_post)
161-
detected_lang = detect(topic_or_post)
160+
def self.translate!(translatable, target_locale_sym = I18n.locale)
161+
detected_lang = detect(translatable)
162162

163-
if text_for_translation(topic_or_post).length > LENGTH_LIMIT
163+
if text_for_translation(translatable).length > LENGTH_LIMIT
164164
raise TranslatorError.new(I18n.t("translator.too_long"))
165165
end
166+
locale =
167+
SUPPORTED_LANG_MAPPING[target_locale_sym] || (raise I18n.t("translator.not_supported"))
166168

167-
save_translation(topic_or_post) do
169+
save_translation(translatable) do
168170
query = default_query.merge("from" => detected_lang, "to" => locale, "textType" => "html")
169171

170-
body = [{ "Text" => text_for_translation(topic_or_post) }].to_json
172+
body = [{ "Text" => text_for_translation(translatable) }].to_json
171173

172174
uri = URI(translate_endpoint)
173175
uri.query = URI.encode_www_form(query)
@@ -208,10 +210,6 @@ def self.custom_endpoint?
208210
SiteSetting.translator_azure_custom_subdomain.present?
209211
end
210212

211-
def self.locale
212-
SUPPORTED_LANG_MAPPING[I18n.locale] || (raise I18n.t("translator.not_supported"))
213-
end
214-
215213
def self.post(uri, body, headers = {})
216214
connection = Faraday.new { |f| f.adapter FinalDestination::FaradayAdapter }
217215
connection.post(uri, body, headers)

app/services/discourse_translator/yandex.rb

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -132,14 +132,16 @@ def self.detect!(topic_or_post)
132132
end
133133
end
134134

135-
def self.translate!(topic_or_post)
136-
detected_lang = detect(topic_or_post)
135+
def self.translate!(translatable, target_locale_sym = I18n.locale)
136+
detected_lang = detect(translatable)
137+
locale =
138+
SUPPORTED_LANG_MAPPING[target_locale_sym] || (raise I18n.t("translator.not_supported"))
137139

138-
save_translation(topic_or_post) do
140+
save_translation(translatable) do
139141
query =
140142
default_query.merge(
141143
"lang" => "#{detected_lang}-#{locale}",
142-
"text" => text_for_translation(topic_or_post),
144+
"text" => text_for_translation(translatable),
143145
"format" => "html",
144146
)
145147

@@ -158,10 +160,6 @@ def self.translate_supported?(detected_lang, target_lang)
158160

159161
private
160162

161-
def self.locale
162-
SUPPORTED_LANG_MAPPING[I18n.locale] || (raise I18n.t("translator.not_supported"))
163-
end
164-
165163
def self.post(uri, body, headers = {})
166164
Excon.post(uri, body: body, headers: headers)
167165
end
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
# frozen_string_literal: true
2+
3+
class HyphenateTranslatorLocales < ActiveRecord::Migration[7.2]
4+
BATCH_SIZE = 1000
5+
6+
def up
7+
normalize_table("discourse_translator_topic_translations", "locale")
8+
normalize_table("discourse_translator_post_translations", "locale")
9+
normalize_table("discourse_translator_topic_locales", "detected_locale")
10+
normalize_table("discourse_translator_post_locales", "detected_locale")
11+
end
12+
13+
def down
14+
raise ActiveRecord::IrreversibleMigration
15+
end
16+
17+
private
18+
19+
def normalize_table(table_name, column)
20+
start_id = 0
21+
loop do
22+
result = DB.query_single(<<~SQL, start_id: start_id, batch_size: BATCH_SIZE)
23+
WITH batch AS (
24+
SELECT id
25+
FROM #{table_name}
26+
WHERE #{column} LIKE '%\\_%' ESCAPE '\\'
27+
AND id > :start_id
28+
ORDER BY id
29+
LIMIT :batch_size
30+
)
31+
UPDATE #{table_name}
32+
SET #{column} = REGEXP_REPLACE(#{column}, '_', '-')
33+
WHERE id IN (SELECT id FROM batch)
34+
RETURNING id
35+
SQL
36+
37+
break if result.empty?
38+
start_id = result.max
39+
end
40+
end
41+
end
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
# frozen_string_literal: true
2+
3+
require_relative "../../db/migrate/20250210171147_hyphenate_translator_locales"
4+
5+
module DiscourseTranslator
6+
describe HyphenateTranslatorLocales do
7+
let(:migration) { described_class.new }
8+
9+
it "normalizes underscores to dashes in all translator tables" do
10+
topic = Fabricate(:topic)
11+
post = Fabricate(:post, topic: topic)
12+
13+
DiscourseTranslator::TopicTranslation.create!(topic:, locale: "en_GB", translation: "test")
14+
DiscourseTranslator::PostTranslation.create!(post:, locale: "fr_CA", translation: "test")
15+
DiscourseTranslator::TopicLocale.create!(topic:, detected_locale: "es_MX")
16+
DiscourseTranslator::PostLocale.create!(post:, detected_locale: "pt_BR")
17+
18+
migration.up
19+
20+
expect(DiscourseTranslator::TopicTranslation.last.locale).to eq("en-GB")
21+
expect(DiscourseTranslator::PostTranslation.last.locale).to eq("fr-CA")
22+
expect(DiscourseTranslator::TopicLocale.last.detected_locale).to eq("es-MX")
23+
expect(DiscourseTranslator::PostLocale.last.detected_locale).to eq("pt-BR")
24+
end
25+
26+
it "handles multiple batches" do
27+
described_class.const_set(:BATCH_SIZE, 2)
28+
29+
topic = Fabricate(:topic)
30+
post = Fabricate(:post, topic: topic)
31+
32+
5.times { |i| post.set_translation("en_#{i}", "test#{i}") }
33+
5.times { |i| post.set_translation("en-#{i + 10}", "test#{i}") }
34+
5.times { |i| post.set_translation("en_#{i + 20}", "test#{i}") }
35+
36+
migration.up
37+
38+
locales = DiscourseTranslator::PostTranslation.pluck(:locale)
39+
expect(locales).to all(match(/\A[a-z]+-\d+\z/))
40+
expect(locales).not_to include(match(/_/))
41+
end
42+
43+
it "only updates records containing underscores" do
44+
topic = Fabricate(:topic)
45+
46+
topic.set_translation("en_GB", "test")
47+
DiscourseTranslator::TopicTranslation.create!(
48+
topic: topic,
49+
locale: "fr_CA",
50+
translation: "test2",
51+
)
52+
53+
expect { migration.up }.to change {
54+
DiscourseTranslator::TopicTranslation.where("locale LIKE ? ESCAPE '\\'", "%\\_%").count
55+
}.from(1).to(0)
56+
57+
expect(DiscourseTranslator::TopicTranslation.pluck(:locale)).to contain_exactly(
58+
"en-GB",
59+
"fr-CA",
60+
)
61+
end
62+
end
63+
end

0 commit comments

Comments
 (0)