Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions app/services/discourse_translator/amazon.rb
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ def self.access_token_key
end

def self.detect(topic_or_post)
text = truncate get_text(topic_or_post)
text = truncate text_for_detection(topic_or_post)

return if text.blank?

Expand All @@ -133,7 +133,7 @@ def self.translate(topic_or_post)
result =
client.translate_text(
{
text: truncate(get_text(topic_or_post)),
text: truncate(text_for_translation(topic_or_post)),
source_language_code: "auto",
target_language_code: SUPPORTED_LANG_MAPPING[I18n.locale],
},
Expand Down
22 changes: 17 additions & 5 deletions app/services/discourse_translator/base.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ class ProblemCheckedTranslationError < TranslatorError
end

class Base
DETECTION_CHAR_LIMIT = 1000

def self.key_prefix
"#{PLUGIN_NAME}:".freeze
end
Expand Down Expand Up @@ -61,17 +63,27 @@ def self.get_text(topic_or_post)
end
end

def self.language_supported?(detected_lang)
raise NotImplementedError unless self.const_defined?(:SUPPORTED_LANG_MAPPING)
supported_lang = const_get(:SUPPORTED_LANG_MAPPING)
return false if supported_lang[I18n.locale].nil?
detected_lang != supported_lang[I18n.locale]
end

private

def self.strip_img_for_detection(detection_text)
html_doc = Nokogiri::HTML::DocumentFragment.parse(detection_text)
html_doc.css("img").remove
html_doc.to_html
end

def self.language_supported?(detected_lang)
raise NotImplementedError unless self.const_defined?(:SUPPORTED_LANG_MAPPING)
supported_lang = const_get(:SUPPORTED_LANG_MAPPING)
return false if supported_lang[I18n.locale].nil?
detected_lang != supported_lang[I18n.locale]
def self.text_for_detection(topic_or_post)
strip_img_for_detection(get_text(topic_or_post).truncate(DETECTION_CHAR_LIMIT, omission: nil))
end

def self.text_for_translation(topic_or_post)
get_text(topic_or_post).truncate(SiteSetting.max_characters_per_translation, omission: nil)
end
end
end
7 changes: 2 additions & 5 deletions app/services/discourse_translator/google.rb
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ class Google < Base
TRANSLATE_URI = "https://www.googleapis.com/language/translate/v2".freeze
DETECT_URI = "https://www.googleapis.com/language/translate/v2/detect".freeze
SUPPORT_URI = "https://www.googleapis.com/language/translate/v2/languages".freeze
MAXLENGTH = 5000

# Hash which maps Discourse's locale code to Google Translate's locale code found in
# https://cloud.google.com/translate/docs/languages
Expand Down Expand Up @@ -76,11 +75,9 @@ def self.access_token
end

def self.detect(topic_or_post)
detection_text = get_text(topic_or_post).truncate(MAXLENGTH, omission: nil)
detection_text = strip_img_for_detection(detection_text)
topic_or_post.custom_fields[DiscourseTranslator::DETECTED_LANG_CUSTOM_FIELD] ||= result(
DETECT_URI,
q: detection_text,
q: text_for_detection(topic_or_post),
)[
"detections"
][
Expand Down Expand Up @@ -115,7 +112,7 @@ def self.translate(topic_or_post)
res =
result(
TRANSLATE_URI,
q: get_text(topic_or_post).truncate(MAXLENGTH, omission: nil),
q: text_for_translation(topic_or_post),
source: detected_lang,
target: SUPPORTED_LANG_MAPPING[I18n.locale],
)
Expand Down
10 changes: 2 additions & 8 deletions app/services/discourse_translator/libre_translate.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@

module DiscourseTranslator
class LibreTranslate < Base
MAXLENGTH = 5000

SUPPORTED_LANG_MAPPING = {
en: "en",
en_GB: "en",
Expand Down Expand Up @@ -84,11 +82,7 @@ def self.detect(topic_or_post)
res =
result(
detect_uri,
q:
ActionController::Base
.helpers
.strip_tags(get_text(topic_or_post))
.truncate(MAXLENGTH, omission: nil),
q: ActionController::Base.helpers.strip_tags(text_for_detection(topic_or_post)),
)

if !res.empty?
Expand Down Expand Up @@ -116,7 +110,7 @@ def self.translate(topic_or_post)
res =
result(
translate_uri,
q: get_text(topic_or_post).truncate(MAXLENGTH, omission: nil),
q: text_for_translation(topic_or_post),
source: detected_lang,
target: SUPPORTED_LANG_MAPPING[I18n.locale],
format: "html",
Expand Down
6 changes: 2 additions & 4 deletions app/services/discourse_translator/microsoft.rb
Original file line number Diff line number Diff line change
Expand Up @@ -96,9 +96,7 @@ def self.access_token_key

def self.detect(topic_or_post)
topic_or_post.custom_fields[DiscourseTranslator::DETECTED_LANG_CUSTOM_FIELD] ||= begin
text = get_text(topic_or_post).truncate(LENGTH_LIMIT, omission: nil)

body = [{ "Text" => text }].to_json
body = [{ "Text" => text_for_detection(topic_or_post) }].to_json

uri = URI(detect_endpoint)
uri.query = URI.encode_www_form(self.default_query)
Expand All @@ -125,7 +123,7 @@ def self.translate(topic_or_post)
from_custom_fields(topic_or_post) do
query = default_query.merge("from" => detected_lang, "to" => locale, "textType" => "html")

body = [{ "Text" => get_text(topic_or_post) }].to_json
body = [{ "Text" => text_for_translation(topic_or_post) }].to_json

uri = URI(translate_endpoint)
uri.query = URI.encode_www_form(query)
Expand Down
4 changes: 2 additions & 2 deletions app/services/discourse_translator/yandex.rb
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ def self.access_token

def self.detect(topic_or_post)
topic_or_post.custom_fields[DiscourseTranslator::DETECTED_LANG_CUSTOM_FIELD] ||= begin
query = default_query.merge("text" => get_text(topic_or_post))
query = default_query.merge("text" => text_for_detection(topic_or_post))

uri = URI(DETECT_URI)
uri.query = URI.encode_www_form(query)
Expand All @@ -149,7 +149,7 @@ def self.translate(topic_or_post)
query =
default_query.merge(
"lang" => "#{detected_lang}-#{locale}",
"text" => get_text(topic_or_post),
"text" => text_for_translation(topic_or_post),
"format" => "html",
)

Expand Down
1 change: 1 addition & 0 deletions config/locales/server.en.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ en:
translator_azure_region: "Azure Region"
translator_google_api_key: "Google API Key"
translator_yandex_api_key: "Yandex API Key"
max_characters_per_translation: "The maximum number of characters that can be sent for translation. If content is longer than this, text will be truncated. Note that each provider also has their own limits."
max_translations_per_minute: "The number of translations per minute a regular user can perform."
translator_libretranslate_endpoint: "LibreTranslate API Endpoint"
translator_libretranslate_api_key: "LibreTranslate API Key"
Expand Down
3 changes: 3 additions & 0 deletions config/settings.yml
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,9 @@ discourse_translator:
default: ''
translator_libretranslate_api_key:
default: ''
max_characters_per_translation:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this need a description in server.en.yml?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I should for consistency.

default: 5000
client: true
max_translations_per_minute:
default: 3
restrict_translation_by_group:
Expand Down
31 changes: 31 additions & 0 deletions spec/services/base_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -32,4 +32,35 @@ class EmptyTranslator < DiscourseTranslator::Base
expect(TestTranslator.language_supported?("pt")).to eq(false)
end
end

describe ".text_for_detection" do
fab!(:post)

it "strips img tags" do
post.cooked = "<img src='http://example.com/image.png' />"
expect(DiscourseTranslator::Base.text_for_detection(post)).to eq("")
end

it "truncates to DETECTION_CHAR_LIMIT of 1000" do
post.cooked = "a" * 1001
expect(DiscourseTranslator::Base.text_for_detection(post).length).to eq(1000)
end

it "returns the text if it's less than DETECTION_CHAR_LIMIT" do
text = "a" * 999
post.cooked = text
expect(DiscourseTranslator::Base.text_for_detection(post)).to eq(text)
end
end

describe ".text_for_translation" do
fab!(:post)

it "truncates to max_characters_per_translation" do
post.cooked = "a" * (SiteSetting.max_characters_per_translation + 1)
expect(DiscourseTranslator::Base.text_for_translation(post).length).to eq(
SiteSetting.max_characters_per_translation,
)
end
end
end
43 changes: 40 additions & 3 deletions spec/services/google_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,14 @@
end
end

it "should truncate string to 5000 characters" do
length = 6000
it "should truncate string to 1000 characters" do
length = 2000
post.cooked = rand(36**length).to_s(36)
detected_lang = "en"

request_url = "#{DiscourseTranslator::Google::DETECT_URI}"
body = {
q: post.cooked.truncate(DiscourseTranslator::Google::MAXLENGTH, omission: nil),
q: post.cooked.truncate(DiscourseTranslator::Google::DETECTION_CHAR_LIMIT, omission: nil),
key: api_key,
}

Expand Down Expand Up @@ -166,5 +166,42 @@

expect { described_class.translate(post) }.to raise_error DiscourseTranslator::TranslatorError
end

it "truncates text for translation to max_characters_per_translation setting" do
SiteSetting.max_characters_per_translation = 50
post.cooked = "a" * 100
post.custom_fields[DiscourseTranslator::DETECTED_LANG_CUSTOM_FIELD] = "de"
post.save_custom_fields
body = {
q: post.cooked.truncate(SiteSetting.max_characters_per_translation, omission: nil),
source: "de",
target: "en",
key: api_key,
}

translated_text = "hur dur hur dur"
Excon
.expects(:post)
.with(
DiscourseTranslator::Google::TRANSLATE_URI,
body: URI.encode_www_form(body),
headers: {
"Content-Type" => "application/x-www-form-urlencoded",
"Referer" => "http://test.localhost",
},
)
.returns(
mock_response.new(
200,
%{ { "data": { "translations": [ { "translatedText": "#{translated_text}" } ] } } },
),
)
.once
Excon.expects(:post).returns(
mock_response.new(200, %{ { "data": { "languages": [ { "language": "de" }] } } }),
)

expect(described_class.translate(post)).to eq(["de", translated_text])
end
end
end