diff --git a/app/services/discourse_translator/amazon.rb b/app/services/discourse_translator/amazon.rb index 3c9c3dea..9e123109 100644 --- a/app/services/discourse_translator/amazon.rb +++ b/app/services/discourse_translator/amazon.rb @@ -108,7 +108,7 @@ def self.access_token_key end def self.detect(topic_or_post) - text = truncate get_text(topic_or_post) + text = truncate text_for_detection(topic_or_post) return if text.blank? @@ -133,7 +133,7 @@ def self.translate(topic_or_post) result = client.translate_text( { - text: truncate(get_text(topic_or_post)), + text: truncate(text_for_translation(topic_or_post)), source_language_code: "auto", target_language_code: SUPPORTED_LANG_MAPPING[I18n.locale], }, diff --git a/app/services/discourse_translator/base.rb b/app/services/discourse_translator/base.rb index 3f04e47a..d9d9bcc4 100644 --- a/app/services/discourse_translator/base.rb +++ b/app/services/discourse_translator/base.rb @@ -10,6 +10,8 @@ class ProblemCheckedTranslationError < TranslatorError end class Base + DETECTION_CHAR_LIMIT = 1000 + def self.key_prefix "#{PLUGIN_NAME}:".freeze end @@ -61,17 +63,27 @@ def self.get_text(topic_or_post) end end + def self.language_supported?(detected_lang) + raise NotImplementedError unless self.const_defined?(:SUPPORTED_LANG_MAPPING) + supported_lang = const_get(:SUPPORTED_LANG_MAPPING) + return false if supported_lang[I18n.locale].nil? + detected_lang != supported_lang[I18n.locale] + end + + private + def self.strip_img_for_detection(detection_text) html_doc = Nokogiri::HTML::DocumentFragment.parse(detection_text) html_doc.css("img").remove html_doc.to_html end - def self.language_supported?(detected_lang) - raise NotImplementedError unless self.const_defined?(:SUPPORTED_LANG_MAPPING) - supported_lang = const_get(:SUPPORTED_LANG_MAPPING) - return false if supported_lang[I18n.locale].nil? - detected_lang != supported_lang[I18n.locale] + def self.text_for_detection(topic_or_post) + strip_img_for_detection(get_text(topic_or_post).truncate(DETECTION_CHAR_LIMIT, omission: nil)) + end + + def self.text_for_translation(topic_or_post) + get_text(topic_or_post).truncate(SiteSetting.max_characters_per_translation, omission: nil) end end end diff --git a/app/services/discourse_translator/google.rb b/app/services/discourse_translator/google.rb index 75d112a8..7002f80a 100644 --- a/app/services/discourse_translator/google.rb +++ b/app/services/discourse_translator/google.rb @@ -8,7 +8,6 @@ class Google < Base TRANSLATE_URI = "https://www.googleapis.com/language/translate/v2".freeze DETECT_URI = "https://www.googleapis.com/language/translate/v2/detect".freeze SUPPORT_URI = "https://www.googleapis.com/language/translate/v2/languages".freeze - MAXLENGTH = 5000 # Hash which maps Discourse's locale code to Google Translate's locale code found in # https://cloud.google.com/translate/docs/languages @@ -76,11 +75,9 @@ def self.access_token end def self.detect(topic_or_post) - detection_text = get_text(topic_or_post).truncate(MAXLENGTH, omission: nil) - detection_text = strip_img_for_detection(detection_text) topic_or_post.custom_fields[DiscourseTranslator::DETECTED_LANG_CUSTOM_FIELD] ||= result( DETECT_URI, - q: detection_text, + q: text_for_detection(topic_or_post), )[ "detections" ][ @@ -115,7 +112,7 @@ def self.translate(topic_or_post) res = result( TRANSLATE_URI, - q: get_text(topic_or_post).truncate(MAXLENGTH, omission: nil), + q: text_for_translation(topic_or_post), source: detected_lang, target: SUPPORTED_LANG_MAPPING[I18n.locale], ) diff --git a/app/services/discourse_translator/libre_translate.rb b/app/services/discourse_translator/libre_translate.rb index 56e2a59d..751c8e7d 100644 --- a/app/services/discourse_translator/libre_translate.rb +++ b/app/services/discourse_translator/libre_translate.rb @@ -5,8 +5,6 @@ module DiscourseTranslator class LibreTranslate < Base - MAXLENGTH = 5000 - SUPPORTED_LANG_MAPPING = { en: "en", en_GB: "en", @@ -84,11 +82,7 @@ def self.detect(topic_or_post) res = result( detect_uri, - q: - ActionController::Base - .helpers - .strip_tags(get_text(topic_or_post)) - .truncate(MAXLENGTH, omission: nil), + q: ActionController::Base.helpers.strip_tags(text_for_detection(topic_or_post)), ) if !res.empty? @@ -116,7 +110,7 @@ def self.translate(topic_or_post) res = result( translate_uri, - q: get_text(topic_or_post).truncate(MAXLENGTH, omission: nil), + q: text_for_translation(topic_or_post), source: detected_lang, target: SUPPORTED_LANG_MAPPING[I18n.locale], format: "html", diff --git a/app/services/discourse_translator/microsoft.rb b/app/services/discourse_translator/microsoft.rb index 3261a956..ca0fc6a2 100644 --- a/app/services/discourse_translator/microsoft.rb +++ b/app/services/discourse_translator/microsoft.rb @@ -96,9 +96,7 @@ def self.access_token_key def self.detect(topic_or_post) topic_or_post.custom_fields[DiscourseTranslator::DETECTED_LANG_CUSTOM_FIELD] ||= begin - text = get_text(topic_or_post).truncate(LENGTH_LIMIT, omission: nil) - - body = [{ "Text" => text }].to_json + body = [{ "Text" => text_for_detection(topic_or_post) }].to_json uri = URI(detect_endpoint) uri.query = URI.encode_www_form(self.default_query) @@ -125,7 +123,7 @@ def self.translate(topic_or_post) from_custom_fields(topic_or_post) do query = default_query.merge("from" => detected_lang, "to" => locale, "textType" => "html") - body = [{ "Text" => get_text(topic_or_post) }].to_json + body = [{ "Text" => text_for_translation(topic_or_post) }].to_json uri = URI(translate_endpoint) uri.query = URI.encode_www_form(query) diff --git a/app/services/discourse_translator/yandex.rb b/app/services/discourse_translator/yandex.rb index 11b444d6..73144031 100644 --- a/app/services/discourse_translator/yandex.rb +++ b/app/services/discourse_translator/yandex.rb @@ -125,7 +125,7 @@ def self.access_token def self.detect(topic_or_post) topic_or_post.custom_fields[DiscourseTranslator::DETECTED_LANG_CUSTOM_FIELD] ||= begin - query = default_query.merge("text" => get_text(topic_or_post)) + query = default_query.merge("text" => text_for_detection(topic_or_post)) uri = URI(DETECT_URI) uri.query = URI.encode_www_form(query) @@ -149,7 +149,7 @@ def self.translate(topic_or_post) query = default_query.merge( "lang" => "#{detected_lang}-#{locale}", - "text" => get_text(topic_or_post), + "text" => text_for_translation(topic_or_post), "format" => "html", ) diff --git a/config/locales/server.en.yml b/config/locales/server.en.yml index bf8f3144..135e457d 100644 --- a/config/locales/server.en.yml +++ b/config/locales/server.en.yml @@ -6,6 +6,7 @@ en: translator_azure_region: "Azure Region" translator_google_api_key: "Google API Key" translator_yandex_api_key: "Yandex API Key" + max_characters_per_translation: "The maximum number of characters that can be sent for translation. If content is longer than this, text will be truncated. Note that each provider also has their own limits." max_translations_per_minute: "The number of translations per minute a regular user can perform." translator_libretranslate_endpoint: "LibreTranslate API Endpoint" translator_libretranslate_api_key: "LibreTranslate API Key" diff --git a/config/settings.yml b/config/settings.yml index e2e97fea..b0912bec 100644 --- a/config/settings.yml +++ b/config/settings.yml @@ -87,6 +87,9 @@ discourse_translator: default: '' translator_libretranslate_api_key: default: '' + max_characters_per_translation: + default: 5000 + client: true max_translations_per_minute: default: 3 restrict_translation_by_group: diff --git a/spec/services/base_spec.rb b/spec/services/base_spec.rb index 5c189374..bb806edb 100644 --- a/spec/services/base_spec.rb +++ b/spec/services/base_spec.rb @@ -32,4 +32,35 @@ class EmptyTranslator < DiscourseTranslator::Base expect(TestTranslator.language_supported?("pt")).to eq(false) end end + + describe ".text_for_detection" do + fab!(:post) + + it "strips img tags" do + post.cooked = "" + expect(DiscourseTranslator::Base.text_for_detection(post)).to eq("") + end + + it "truncates to DETECTION_CHAR_LIMIT of 1000" do + post.cooked = "a" * 1001 + expect(DiscourseTranslator::Base.text_for_detection(post).length).to eq(1000) + end + + it "returns the text if it's less than DETECTION_CHAR_LIMIT" do + text = "a" * 999 + post.cooked = text + expect(DiscourseTranslator::Base.text_for_detection(post)).to eq(text) + end + end + + describe ".text_for_translation" do + fab!(:post) + + it "truncates to max_characters_per_translation" do + post.cooked = "a" * (SiteSetting.max_characters_per_translation + 1) + expect(DiscourseTranslator::Base.text_for_translation(post).length).to eq( + SiteSetting.max_characters_per_translation, + ) + end + end end diff --git a/spec/services/google_spec.rb b/spec/services/google_spec.rb index 94aa75ea..06073af2 100644 --- a/spec/services/google_spec.rb +++ b/spec/services/google_spec.rb @@ -43,14 +43,14 @@ end end - it "should truncate string to 5000 characters" do - length = 6000 + it "should truncate string to 1000 characters" do + length = 2000 post.cooked = rand(36**length).to_s(36) detected_lang = "en" request_url = "#{DiscourseTranslator::Google::DETECT_URI}" body = { - q: post.cooked.truncate(DiscourseTranslator::Google::MAXLENGTH, omission: nil), + q: post.cooked.truncate(DiscourseTranslator::Google::DETECTION_CHAR_LIMIT, omission: nil), key: api_key, } @@ -166,5 +166,42 @@ expect { described_class.translate(post) }.to raise_error DiscourseTranslator::TranslatorError end + + it "truncates text for translation to max_characters_per_translation setting" do + SiteSetting.max_characters_per_translation = 50 + post.cooked = "a" * 100 + post.custom_fields[DiscourseTranslator::DETECTED_LANG_CUSTOM_FIELD] = "de" + post.save_custom_fields + body = { + q: post.cooked.truncate(SiteSetting.max_characters_per_translation, omission: nil), + source: "de", + target: "en", + key: api_key, + } + + translated_text = "hur dur hur dur" + Excon + .expects(:post) + .with( + DiscourseTranslator::Google::TRANSLATE_URI, + body: URI.encode_www_form(body), + headers: { + "Content-Type" => "application/x-www-form-urlencoded", + "Referer" => "http://test.localhost", + }, + ) + .returns( + mock_response.new( + 200, + %{ { "data": { "translations": [ { "translatedText": "#{translated_text}" } ] } } }, + ), + ) + .once + Excon.expects(:post).returns( + mock_response.new(200, %{ { "data": { "languages": [ { "language": "de" }] } } }), + ) + + expect(described_class.translate(post)).to eq(["de", translated_text]) + end end end