diff --git a/app/models/concerns/discourse_translator/translatable.rb b/app/models/concerns/discourse_translator/translatable.rb index 02948ecb..9b451f8b 100644 --- a/app/models/concerns/discourse_translator/translatable.rb +++ b/app/models/concerns/discourse_translator/translatable.rb @@ -23,6 +23,7 @@ def set_translation(locale, text) end def translation_for(locale) + locale = locale.to_s.gsub("_", "-") translations.find_by(locale: locale)&.translation end @@ -30,6 +31,17 @@ def detected_locale content_locale&.detected_locale end + def locale_matches?(locale, normalise_region: true) + return false if detected_locale.blank? || locale.blank? + + # locales can be :en :en_US "en" "en-US" + detected = detected_locale.gsub("_", "-") + target = locale.to_s.gsub("_", "-") + detected = detected.split("-").first if normalise_region + target = target.split("-").first if normalise_region + detected == target + end + private def clear_translations diff --git a/app/services/discourse_translator/amazon.rb b/app/services/discourse_translator/amazon.rb index 4a6b3771..314ec9b2 100644 --- a/app/services/discourse_translator/amazon.rb +++ b/app/services/discourse_translator/amazon.rb @@ -123,23 +123,23 @@ def self.detect!(topic_or_post) end end - def self.translate!(topic_or_post) - detected_lang = detect(topic_or_post) + def self.translate!(translatable, target_locale_sym = I18n.locale) + detected_lang = detect(translatable) - save_translation(topic_or_post) do + save_translation(translatable) do begin client.translate_text( { - text: truncate(text_for_translation(topic_or_post)), + text: truncate(text_for_translation(translatable)), source_language_code: "auto", - target_language_code: SUPPORTED_LANG_MAPPING[I18n.locale], + target_language_code: SUPPORTED_LANG_MAPPING[target_locale_sym], }, ) rescue Aws::Translate::Errors::UnsupportedLanguagePairException raise I18n.t( "translator.failed", source_locale: detected_lang, - target_locale: I18n.locale, + target_locale: target_locale_sym, ) end end diff --git a/app/services/discourse_translator/base.rb b/app/services/discourse_translator/base.rb index d714825f..44909fd1 100644 --- a/app/services/discourse_translator/base.rb +++ b/app/services/discourse_translator/base.rb @@ -28,21 +28,23 @@ def self.cache_key # If the translation does not exist yet, it will be translated first via the API then stored. # If the detected language is the same as the target language, the original text will be returned. # @param translatable [Post|Topic] - def self.translate(translatable) + def self.translate(translatable, target_locale_sym = I18n.locale) return if text_for_translation(translatable).blank? detected_lang = detect(translatable) - return detected_lang, get_text(translatable) if (detected_lang&.to_s == I18n.locale.to_s) + if translatable.locale_matches?(target_locale_sym) + return detected_lang, get_text(translatable) + end - existing_translation = get_translation(translatable) - return detected_lang, existing_translation if existing_translation.present? + translation = translatable.translation_for(target_locale_sym) + return detected_lang, translation if translation.present? - unless translate_supported?(detected_lang, I18n.locale) + unless translate_supported?(detected_lang, target_locale_sym) raise TranslatorError.new( I18n.t( "translator.failed", source_locale: detected_lang, - target_locale: I18n.locale, + target_locale: target_locale_sym, ), ) end @@ -52,7 +54,7 @@ def self.translate(translatable) # Subclasses must implement this method to translate the text of a post or topic # then use the save_translation method to store the translated text. # @param translatable [Post|Topic] - def self.translate!(translatable) + def self.translate!(translatable, target_locale_sym = I18n.locale) raise "Not Implemented" end @@ -75,10 +77,6 @@ def self.access_token raise "Not Implemented" end - def self.get_translation(translatable) - translatable.translation_for(I18n.locale) - end - def self.save_translation(translatable) translation = yield translatable.set_translation(I18n.locale, translation) diff --git a/app/services/discourse_translator/discourse_ai.rb b/app/services/discourse_translator/discourse_ai.rb index 8537accd..b057e9bd 100644 --- a/app/services/discourse_translator/discourse_ai.rb +++ b/app/services/discourse_translator/discourse_ai.rb @@ -19,16 +19,14 @@ def self.detect!(topic_or_post) end end - def self.translate(topic_or_post) + def self.translate!(translatable, target_locale_sym = I18n.locale) return unless required_settings_enabled - - detected_lang = detect(topic_or_post) - translated_text = - save_translation(topic_or_post) do - ::DiscourseAi::Translator.new(text_for_translation(topic_or_post), I18n.locale).translate - end - - [detected_lang, translated_text] + save_translation(translatable) do + ::DiscourseAi::Translator.new( + text_for_translation(translatable), + target_locale_sym, + ).translate + end end private diff --git a/app/services/discourse_translator/google.rb b/app/services/discourse_translator/google.rb index 55e65a1d..4d409484 100644 --- a/app/services/discourse_translator/google.rb +++ b/app/services/discourse_translator/google.rb @@ -89,15 +89,15 @@ def self.translate_supported?(source, target) res["languages"].any? { |obj| obj["language"] == source } end - def self.translate!(topic_or_post) - detected_locale = detect(topic_or_post) - save_translation(topic_or_post) do + def self.translate!(translatable, target_locale_sym = I18n.locale) + detected_locale = detect(translatable) + save_translation(translatable) do res = result( TRANSLATE_URI, - q: text_for_translation(topic_or_post), + q: text_for_translation(translatable), source: detected_locale, - target: SUPPORTED_LANG_MAPPING[I18n.locale], + target: SUPPORTED_LANG_MAPPING[target_locale_sym], ) res["translations"][0]["translatedText"] end diff --git a/app/services/discourse_translator/libre_translate.rb b/app/services/discourse_translator/libre_translate.rb index f464c9c3..0e641f20 100644 --- a/app/services/discourse_translator/libre_translate.rb +++ b/app/services/discourse_translator/libre_translate.rb @@ -95,16 +95,16 @@ def self.translate_supported?(source, target) res.any? { |obj| obj["code"] == source } && res.any? { |obj| obj["code"] == lang } end - def self.translate!(topic_or_post) - detected_lang = detect(topic_or_post) + def self.translate!(translatable, target_locale_sym = I18n.locale) + detected_lang = detect(translatable) - save_translation(topic_or_post) do + save_translation(translatable) do res = result( translate_uri, - q: text_for_translation(topic_or_post), + q: text_for_translation(translatable), source: detected_lang, - target: SUPPORTED_LANG_MAPPING[I18n.locale], + target: SUPPORTED_LANG_MAPPING[target_locale], format: "html", ) res["translatedText"] diff --git a/app/services/discourse_translator/microsoft.rb b/app/services/discourse_translator/microsoft.rb index 4a4d1653..e70295bf 100644 --- a/app/services/discourse_translator/microsoft.rb +++ b/app/services/discourse_translator/microsoft.rb @@ -157,17 +157,19 @@ def self.detect!(topic_or_post) end end - def self.translate!(topic_or_post) - detected_lang = detect(topic_or_post) + def self.translate!(translatable, target_locale_sym = I18n.locale) + detected_lang = detect(translatable) - if text_for_translation(topic_or_post).length > LENGTH_LIMIT + if text_for_translation(translatable).length > LENGTH_LIMIT raise TranslatorError.new(I18n.t("translator.too_long")) end + locale = + SUPPORTED_LANG_MAPPING[target_locale_sym] || (raise I18n.t("translator.not_supported")) - save_translation(topic_or_post) do + save_translation(translatable) do query = default_query.merge("from" => detected_lang, "to" => locale, "textType" => "html") - body = [{ "Text" => text_for_translation(topic_or_post) }].to_json + body = [{ "Text" => text_for_translation(translatable) }].to_json uri = URI(translate_endpoint) uri.query = URI.encode_www_form(query) @@ -208,10 +210,6 @@ def self.custom_endpoint? SiteSetting.translator_azure_custom_subdomain.present? end - def self.locale - SUPPORTED_LANG_MAPPING[I18n.locale] || (raise I18n.t("translator.not_supported")) - end - def self.post(uri, body, headers = {}) connection = Faraday.new { |f| f.adapter FinalDestination::FaradayAdapter } connection.post(uri, body, headers) diff --git a/app/services/discourse_translator/yandex.rb b/app/services/discourse_translator/yandex.rb index cadeed61..2ea91b95 100644 --- a/app/services/discourse_translator/yandex.rb +++ b/app/services/discourse_translator/yandex.rb @@ -132,14 +132,16 @@ def self.detect!(topic_or_post) end end - def self.translate!(topic_or_post) - detected_lang = detect(topic_or_post) + def self.translate!(translatable, target_locale_sym = I18n.locale) + detected_lang = detect(translatable) + locale = + SUPPORTED_LANG_MAPPING[target_locale_sym] || (raise I18n.t("translator.not_supported")) - save_translation(topic_or_post) do + save_translation(translatable) do query = default_query.merge( "lang" => "#{detected_lang}-#{locale}", - "text" => text_for_translation(topic_or_post), + "text" => text_for_translation(translatable), "format" => "html", ) @@ -158,10 +160,6 @@ def self.translate_supported?(detected_lang, target_lang) private - def self.locale - SUPPORTED_LANG_MAPPING[I18n.locale] || (raise I18n.t("translator.not_supported")) - end - def self.post(uri, body, headers = {}) Excon.post(uri, body: body, headers: headers) end diff --git a/db/migrate/20250210171147_hyphenate_translator_locales.rb b/db/migrate/20250210171147_hyphenate_translator_locales.rb new file mode 100644 index 00000000..598792a0 --- /dev/null +++ b/db/migrate/20250210171147_hyphenate_translator_locales.rb @@ -0,0 +1,41 @@ +# frozen_string_literal: true + +class HyphenateTranslatorLocales < ActiveRecord::Migration[7.2] + BATCH_SIZE = 1000 + + def up + normalize_table("discourse_translator_topic_translations", "locale") + normalize_table("discourse_translator_post_translations", "locale") + normalize_table("discourse_translator_topic_locales", "detected_locale") + normalize_table("discourse_translator_post_locales", "detected_locale") + end + + def down + raise ActiveRecord::IrreversibleMigration + end + + private + + def normalize_table(table_name, column) + start_id = 0 + loop do + result = DB.query_single(<<~SQL, start_id: start_id, batch_size: BATCH_SIZE) + WITH batch AS ( + SELECT id + FROM #{table_name} + WHERE #{column} LIKE '%\\_%' ESCAPE '\\' + AND id > :start_id + ORDER BY id + LIMIT :batch_size + ) + UPDATE #{table_name} + SET #{column} = REGEXP_REPLACE(#{column}, '_', '-') + WHERE id IN (SELECT id FROM batch) + RETURNING id + SQL + + break if result.empty? + start_id = result.max + end + end +end diff --git a/spec/models/hyphenate_locales_spec.rb b/spec/models/hyphenate_locales_spec.rb new file mode 100644 index 00000000..e14f4c3e --- /dev/null +++ b/spec/models/hyphenate_locales_spec.rb @@ -0,0 +1,63 @@ +# frozen_string_literal: true + +require_relative "../../db/migrate/20250210171147_hyphenate_translator_locales" + +module DiscourseTranslator + describe HyphenateTranslatorLocales do + let(:migration) { described_class.new } + + it "normalizes underscores to dashes in all translator tables" do + topic = Fabricate(:topic) + post = Fabricate(:post, topic: topic) + + DiscourseTranslator::TopicTranslation.create!(topic:, locale: "en_GB", translation: "test") + DiscourseTranslator::PostTranslation.create!(post:, locale: "fr_CA", translation: "test") + DiscourseTranslator::TopicLocale.create!(topic:, detected_locale: "es_MX") + DiscourseTranslator::PostLocale.create!(post:, detected_locale: "pt_BR") + + migration.up + + expect(DiscourseTranslator::TopicTranslation.last.locale).to eq("en-GB") + expect(DiscourseTranslator::PostTranslation.last.locale).to eq("fr-CA") + expect(DiscourseTranslator::TopicLocale.last.detected_locale).to eq("es-MX") + expect(DiscourseTranslator::PostLocale.last.detected_locale).to eq("pt-BR") + end + + it "handles multiple batches" do + described_class.const_set(:BATCH_SIZE, 2) + + topic = Fabricate(:topic) + post = Fabricate(:post, topic: topic) + + 5.times { |i| post.set_translation("en_#{i}", "test#{i}") } + 5.times { |i| post.set_translation("en-#{i + 10}", "test#{i}") } + 5.times { |i| post.set_translation("en_#{i + 20}", "test#{i}") } + + migration.up + + locales = DiscourseTranslator::PostTranslation.pluck(:locale) + expect(locales).to all(match(/\A[a-z]+-\d+\z/)) + expect(locales).not_to include(match(/_/)) + end + + it "only updates records containing underscores" do + topic = Fabricate(:topic) + + topic.set_translation("en_GB", "test") + DiscourseTranslator::TopicTranslation.create!( + topic: topic, + locale: "fr_CA", + translation: "test2", + ) + + expect { migration.up }.to change { + DiscourseTranslator::TopicTranslation.where("locale LIKE ? ESCAPE '\\'", "%\\_%").count + }.from(1).to(0) + + expect(DiscourseTranslator::TopicTranslation.pluck(:locale)).to contain_exactly( + "en-GB", + "fr-CA", + ) + end + end +end diff --git a/spec/models/topic_spec.rb b/spec/models/topic_spec.rb index d88832ba..5fea78fc 100644 --- a/spec/models/topic_spec.rb +++ b/spec/models/topic_spec.rb @@ -63,5 +63,37 @@ expect(topic.content_locale.detected_locale).to eq("en-US") end end + + describe "#locale_matches?" do + it "returns false when detected locale is blank" do + expect(topic.locale_matches?("en-US")).to eq(false) + end + + it "returns false when locale is blank" do + topic.set_detected_locale("en-US") + expect(topic.locale_matches?(nil)).to eq(false) + end + + [:en, "en", "en-US", :en_US, "en-GB", "en_GB", :en_GB].each do |locale| + it "returns true when matching normalised #{locale} to \"en\"" do + topic.set_detected_locale("en") + expect(topic.locale_matches?(locale)).to eq(true) + end + end + + ["en-GB", "en_GB", :en_GB].each do |locale| + it "returns true when matching #{locale} to \"en_GB\"" do + topic.set_detected_locale("en_GB") + expect(topic.locale_matches?(locale, normalise_region: false)).to eq(true) + end + end + + [:en, "en", "en-US", :en_US].each do |locale| + it "returns false when matching #{locale} to \"en_GB\"" do + topic.set_detected_locale("en_GB") + expect(topic.locale_matches?(locale, normalise_region: false)).to eq(false) + end + end + end end end diff --git a/spec/services/google_spec.rb b/spec/services/google_spec.rb index 01d92553..f6d7d8ba 100644 --- a/spec/services/google_spec.rb +++ b/spec/services/google_spec.rb @@ -112,8 +112,8 @@ it "should pass through strings already in target language" do lang = I18n.locale - described_class.expects(:detect).returns(lang) - expect(described_class.translate(topic)).to eq([lang, "This title is in english"]) + topic.set_detected_locale(lang) + expect(described_class.translate(topic)).to eq(["en", "This title is in english"]) end end