Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions app/services/discourse_translator/amazon.rb
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ def self.access_token_key
end

def self.detect(topic_or_post)
text = truncate get_text(topic_or_post)
text = truncate text_for_detection(topic_or_post)

return if text.blank?

Expand All @@ -133,7 +133,7 @@ def self.translate(topic_or_post)
result =
client.translate_text(
{
text: truncate(get_text(topic_or_post)),
text: truncate(text_for_translation(topic_or_post)),
source_language_code: "auto",
target_language_code: SUPPORTED_LANG_MAPPING[I18n.locale],
},
Expand Down
22 changes: 17 additions & 5 deletions app/services/discourse_translator/base.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ class ProblemCheckedTranslationError < TranslatorError
end

class Base
DETECTION_CHAR_LIMIT = 1000

def self.key_prefix
"#{PLUGIN_NAME}:".freeze
end
Expand Down Expand Up @@ -61,17 +63,27 @@ def self.get_text(topic_or_post)
end
end

def self.language_supported?(detected_lang)
raise NotImplementedError unless self.const_defined?(:SUPPORTED_LANG_MAPPING)
supported_lang = const_get(:SUPPORTED_LANG_MAPPING)
return false if supported_lang[I18n.locale].nil?
detected_lang != supported_lang[I18n.locale]
end

private

def self.strip_img_for_detection(detection_text)
html_doc = Nokogiri::HTML::DocumentFragment.parse(detection_text)
html_doc.css("img").remove
html_doc.to_html
end

def self.language_supported?(detected_lang)
raise NotImplementedError unless self.const_defined?(:SUPPORTED_LANG_MAPPING)
supported_lang = const_get(:SUPPORTED_LANG_MAPPING)
return false if supported_lang[I18n.locale].nil?
detected_lang != supported_lang[I18n.locale]
def self.text_for_detection(topic_or_post)
strip_img_for_detection(get_text(topic_or_post).truncate(DETECTION_CHAR_LIMIT, omission: nil))
end

def self.text_for_translation(topic_or_post)
get_text(topic_or_post).truncate(SiteSetting.max_characters_per_translation, omission: nil)
end
end
end
7 changes: 2 additions & 5 deletions app/services/discourse_translator/google.rb
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ class Google < Base
TRANSLATE_URI = "https://www.googleapis.com/language/translate/v2".freeze
DETECT_URI = "https://www.googleapis.com/language/translate/v2/detect".freeze
SUPPORT_URI = "https://www.googleapis.com/language/translate/v2/languages".freeze
MAXLENGTH = 5000

# Hash which maps Discourse's locale code to Google Translate's locale code found in
# https://cloud.google.com/translate/docs/languages
Expand Down Expand Up @@ -76,11 +75,9 @@ def self.access_token
end

def self.detect(topic_or_post)
detection_text = get_text(topic_or_post).truncate(MAXLENGTH, omission: nil)
detection_text = strip_img_for_detection(detection_text)
topic_or_post.custom_fields[DiscourseTranslator::DETECTED_LANG_CUSTOM_FIELD] ||= result(
DETECT_URI,
q: detection_text,
q: text_for_detection(topic_or_post),
)[
"detections"
][
Expand Down Expand Up @@ -115,7 +112,7 @@ def self.translate(topic_or_post)
res =
result(
TRANSLATE_URI,
q: get_text(topic_or_post).truncate(MAXLENGTH, omission: nil),
q: text_for_translation(topic_or_post),
source: detected_lang,
target: SUPPORTED_LANG_MAPPING[I18n.locale],
)
Expand Down
10 changes: 2 additions & 8 deletions app/services/discourse_translator/libre_translate.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@

module DiscourseTranslator
class LibreTranslate < Base
MAXLENGTH = 5000

SUPPORTED_LANG_MAPPING = {
en: "en",
en_GB: "en",
Expand Down Expand Up @@ -84,11 +82,7 @@ def self.detect(topic_or_post)
res =
result(
detect_uri,
q:
ActionController::Base
.helpers
.strip_tags(get_text(topic_or_post))
.truncate(MAXLENGTH, omission: nil),
q: ActionController::Base.helpers.strip_tags(text_for_detection(topic_or_post)),
)

if !res.empty?
Expand Down Expand Up @@ -116,7 +110,7 @@ def self.translate(topic_or_post)
res =
result(
translate_uri,
q: get_text(topic_or_post).truncate(MAXLENGTH, omission: nil),
q: text_for_translation(topic_or_post),
source: detected_lang,
target: SUPPORTED_LANG_MAPPING[I18n.locale],
format: "html",
Expand Down
6 changes: 2 additions & 4 deletions app/services/discourse_translator/microsoft.rb
Original file line number Diff line number Diff line change
Expand Up @@ -96,9 +96,7 @@ def self.access_token_key

def self.detect(topic_or_post)
topic_or_post.custom_fields[DiscourseTranslator::DETECTED_LANG_CUSTOM_FIELD] ||= begin
text = get_text(topic_or_post).truncate(LENGTH_LIMIT, omission: nil)

body = [{ "Text" => text }].to_json
body = [{ "Text" => text_for_detection(topic_or_post) }].to_json

uri = URI(detect_endpoint)
uri.query = URI.encode_www_form(self.default_query)
Expand All @@ -125,7 +123,7 @@ def self.translate(topic_or_post)
from_custom_fields(topic_or_post) do
query = default_query.merge("from" => detected_lang, "to" => locale, "textType" => "html")

body = [{ "Text" => get_text(topic_or_post) }].to_json
body = [{ "Text" => text_for_translation(topic_or_post) }].to_json

uri = URI(translate_endpoint)
uri.query = URI.encode_www_form(query)
Expand Down
4 changes: 2 additions & 2 deletions app/services/discourse_translator/yandex.rb
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ def self.access_token

def self.detect(topic_or_post)
topic_or_post.custom_fields[DiscourseTranslator::DETECTED_LANG_CUSTOM_FIELD] ||= begin
query = default_query.merge("text" => get_text(topic_or_post))
query = default_query.merge("text" => text_for_detection(topic_or_post))

uri = URI(DETECT_URI)
uri.query = URI.encode_www_form(query)
Expand All @@ -149,7 +149,7 @@ def self.translate(topic_or_post)
query =
default_query.merge(
"lang" => "#{detected_lang}-#{locale}",
"text" => get_text(topic_or_post),
"text" => text_for_translation(topic_or_post),
"format" => "html",
)

Expand Down
3 changes: 3 additions & 0 deletions config/settings.yml
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,9 @@ discourse_translator:
default: ''
translator_libretranslate_api_key:
default: ''
max_characters_per_translation:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this need a description in server.en.yml?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I should for consistency.

default: 5000
client: true
max_translations_per_minute:
default: 3
restrict_translation_by_group:
Expand Down
31 changes: 31 additions & 0 deletions spec/services/base_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -32,4 +32,35 @@ class EmptyTranslator < DiscourseTranslator::Base
expect(TestTranslator.language_supported?("pt")).to eq(false)
end
end

describe ".text_for_detection" do
fab!(:post)

it "strips img tags" do
post.cooked = "<img src='http://example.com/image.png' />"
expect(DiscourseTranslator::Base.text_for_detection(post)).to eq("")
end

it "truncates to DETECTION_CHAR_LIMIT of 1000" do
post.cooked = "a" * 1001
expect(DiscourseTranslator::Base.text_for_detection(post).length).to eq(1000)
end

it "returns the text if it's less than DETECTION_CHAR_LIMIT" do
text = "a" * 999
post.cooked = text
expect(DiscourseTranslator::Base.text_for_detection(post)).to eq(text)
end
end

describe ".text_for_translation" do
fab!(:post)

it "truncates to max_characters_per_translation" do
post.cooked = "a" * (SiteSetting.max_characters_per_translation + 1)
expect(DiscourseTranslator::Base.text_for_translation(post).length).to eq(
SiteSetting.max_characters_per_translation,
)
end
end
end
43 changes: 40 additions & 3 deletions spec/services/google_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,14 @@
end
end

it "should truncate string to 5000 characters" do
length = 6000
it "should truncate string to 1000 characters" do
length = 2000
post.cooked = rand(36**length).to_s(36)
detected_lang = "en"

request_url = "#{DiscourseTranslator::Google::DETECT_URI}"
body = {
q: post.cooked.truncate(DiscourseTranslator::Google::MAXLENGTH, omission: nil),
q: post.cooked.truncate(DiscourseTranslator::Google::DETECTION_CHAR_LIMIT, omission: nil),
key: api_key,
}

Expand Down Expand Up @@ -166,5 +166,42 @@

expect { described_class.translate(post) }.to raise_error DiscourseTranslator::TranslatorError
end

it "truncates text for translation to max_characters_per_translation setting" do
SiteSetting.max_characters_per_translation = 50
post.cooked = "a" * 100
post.custom_fields[DiscourseTranslator::DETECTED_LANG_CUSTOM_FIELD] = "de"
post.save_custom_fields
body = {
q: post.cooked.truncate(SiteSetting.max_characters_per_translation, omission: nil),
source: "de",
target: "en",
key: api_key,
}

translated_text = "hur dur hur dur"
Excon
.expects(:post)
.with(
DiscourseTranslator::Google::TRANSLATE_URI,
body: URI.encode_www_form(body),
headers: {
"Content-Type" => "application/x-www-form-urlencoded",
"Referer" => "http://test.localhost",
},
)
.returns(
mock_response.new(
200,
%{ { "data": { "translations": [ { "translatedText": "#{translated_text}" } ] } } },
),
)
.once
Excon.expects(:post).returns(
mock_response.new(200, %{ { "data": { "languages": [ { "language": "de" }] } } }),
)

expect(described_class.translate(post)).to eq(["de", translated_text])
end
end
end