Skip to content

Commit 6ec1171

Browse files
committed
FEATURE: Add max character for translation setting
1 parent e142757 commit 6ec1171

File tree

9 files changed

+101
-29
lines changed

9 files changed

+101
-29
lines changed

app/services/discourse_translator/amazon.rb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ def self.access_token_key
108108
end
109109

110110
def self.detect(topic_or_post)
111-
text = truncate get_text(topic_or_post)
111+
text = truncate text_for_detection(topic_or_post)
112112

113113
return if text.blank?
114114

@@ -133,7 +133,7 @@ def self.translate(topic_or_post)
133133
result =
134134
client.translate_text(
135135
{
136-
text: truncate(get_text(topic_or_post)),
136+
text: truncate(text_for_translation(topic_or_post)),
137137
source_language_code: "auto",
138138
target_language_code: SUPPORTED_LANG_MAPPING[I18n.locale],
139139
},

app/services/discourse_translator/base.rb

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ class ProblemCheckedTranslationError < TranslatorError
1010
end
1111

1212
class Base
13+
DETECTION_CHAR_LIMIT = 1000
14+
1315
def self.key_prefix
1416
"#{PLUGIN_NAME}:".freeze
1517
end
@@ -61,17 +63,27 @@ def self.get_text(topic_or_post)
6163
end
6264
end
6365

66+
def self.language_supported?(detected_lang)
67+
raise NotImplementedError unless self.const_defined?(:SUPPORTED_LANG_MAPPING)
68+
supported_lang = const_get(:SUPPORTED_LANG_MAPPING)
69+
return false if supported_lang[I18n.locale].nil?
70+
detected_lang != supported_lang[I18n.locale]
71+
end
72+
73+
private
74+
6475
def self.strip_img_for_detection(detection_text)
6576
html_doc = Nokogiri::HTML::DocumentFragment.parse(detection_text)
6677
html_doc.css("img").remove
6778
html_doc.to_html
6879
end
6980

70-
def self.language_supported?(detected_lang)
71-
raise NotImplementedError unless self.const_defined?(:SUPPORTED_LANG_MAPPING)
72-
supported_lang = const_get(:SUPPORTED_LANG_MAPPING)
73-
return false if supported_lang[I18n.locale].nil?
74-
detected_lang != supported_lang[I18n.locale]
81+
def self.text_for_detection(topic_or_post)
82+
strip_img_for_detection(get_text(topic_or_post).truncate(DETECTION_CHAR_LIMIT, omission: nil))
83+
end
84+
85+
def self.text_for_translation(topic_or_post)
86+
get_text(topic_or_post).truncate(SiteSetting.max_characters_per_translation, omission: nil)
7587
end
7688
end
7789
end

app/services/discourse_translator/google.rb

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ class Google < Base
88
TRANSLATE_URI = "https://www.googleapis.com/language/translate/v2".freeze
99
DETECT_URI = "https://www.googleapis.com/language/translate/v2/detect".freeze
1010
SUPPORT_URI = "https://www.googleapis.com/language/translate/v2/languages".freeze
11-
MAXLENGTH = 5000
1211

1312
# Hash which maps Discourse's locale code to Google Translate's locale code found in
1413
# https://cloud.google.com/translate/docs/languages
@@ -76,11 +75,9 @@ def self.access_token
7675
end
7776

7877
def self.detect(topic_or_post)
79-
detection_text = get_text(topic_or_post).truncate(MAXLENGTH, omission: nil)
80-
detection_text = strip_img_for_detection(detection_text)
8178
topic_or_post.custom_fields[DiscourseTranslator::DETECTED_LANG_CUSTOM_FIELD] ||= result(
8279
DETECT_URI,
83-
q: detection_text,
80+
q: text_for_detection(topic_or_post),
8481
)[
8582
"detections"
8683
][
@@ -115,7 +112,7 @@ def self.translate(topic_or_post)
115112
res =
116113
result(
117114
TRANSLATE_URI,
118-
q: get_text(topic_or_post).truncate(MAXLENGTH, omission: nil),
115+
q: text_for_translation(topic_or_post),
119116
source: detected_lang,
120117
target: SUPPORTED_LANG_MAPPING[I18n.locale],
121118
)

app/services/discourse_translator/libre_translate.rb

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,6 @@
55

66
module DiscourseTranslator
77
class LibreTranslate < Base
8-
MAXLENGTH = 5000
9-
108
SUPPORTED_LANG_MAPPING = {
119
en: "en",
1210
en_GB: "en",
@@ -84,11 +82,7 @@ def self.detect(topic_or_post)
8482
res =
8583
result(
8684
detect_uri,
87-
q:
88-
ActionController::Base
89-
.helpers
90-
.strip_tags(get_text(topic_or_post))
91-
.truncate(MAXLENGTH, omission: nil),
85+
q: ActionController::Base.helpers.strip_tags(text_for_detection(topic_or_post)),
9286
)
9387

9488
if !res.empty?
@@ -116,7 +110,7 @@ def self.translate(topic_or_post)
116110
res =
117111
result(
118112
translate_uri,
119-
q: get_text(topic_or_post).truncate(MAXLENGTH, omission: nil),
113+
q: text_for_translation(topic_or_post),
120114
source: detected_lang,
121115
target: SUPPORTED_LANG_MAPPING[I18n.locale],
122116
format: "html",

app/services/discourse_translator/microsoft.rb

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -96,9 +96,7 @@ def self.access_token_key
9696

9797
def self.detect(topic_or_post)
9898
topic_or_post.custom_fields[DiscourseTranslator::DETECTED_LANG_CUSTOM_FIELD] ||= begin
99-
text = get_text(topic_or_post).truncate(LENGTH_LIMIT, omission: nil)
100-
101-
body = [{ "Text" => text }].to_json
99+
body = [{ "Text" => text_for_detection(topic_or_post) }].to_json
102100

103101
uri = URI(detect_endpoint)
104102
uri.query = URI.encode_www_form(self.default_query)
@@ -125,7 +123,7 @@ def self.translate(topic_or_post)
125123
from_custom_fields(topic_or_post) do
126124
query = default_query.merge("from" => detected_lang, "to" => locale, "textType" => "html")
127125

128-
body = [{ "Text" => get_text(topic_or_post) }].to_json
126+
body = [{ "Text" => text_for_translation(topic_or_post) }].to_json
129127

130128
uri = URI(translate_endpoint)
131129
uri.query = URI.encode_www_form(query)

app/services/discourse_translator/yandex.rb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ def self.access_token
125125

126126
def self.detect(topic_or_post)
127127
topic_or_post.custom_fields[DiscourseTranslator::DETECTED_LANG_CUSTOM_FIELD] ||= begin
128-
query = default_query.merge("text" => get_text(topic_or_post))
128+
query = default_query.merge("text" => text_for_detection(topic_or_post))
129129

130130
uri = URI(DETECT_URI)
131131
uri.query = URI.encode_www_form(query)
@@ -149,7 +149,7 @@ def self.translate(topic_or_post)
149149
query =
150150
default_query.merge(
151151
"lang" => "#{detected_lang}-#{locale}",
152-
"text" => get_text(topic_or_post),
152+
"text" => text_for_translation(topic_or_post),
153153
"format" => "html",
154154
)
155155

config/settings.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,9 @@ discourse_translator:
8787
default: ''
8888
translator_libretranslate_api_key:
8989
default: ''
90+
max_characters_per_translation:
91+
default: 5000
92+
client: true
9093
max_translations_per_minute:
9194
default: 3
9295
restrict_translation_by_group:

spec/services/base_spec.rb

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,4 +32,35 @@ class EmptyTranslator < DiscourseTranslator::Base
3232
expect(TestTranslator.language_supported?("pt")).to eq(false)
3333
end
3434
end
35+
36+
describe ".text_for_detection" do
37+
fab!(:post)
38+
39+
it "strips img tags" do
40+
post.cooked = "<img src='http://example.com/image.png' />"
41+
expect(DiscourseTranslator::Base.text_for_detection(post)).to eq("")
42+
end
43+
44+
it "truncates to DETECTION_CHAR_LIMIT of 1000" do
45+
post.cooked = "a" * 1001
46+
expect(DiscourseTranslator::Base.text_for_detection(post).length).to eq(1000)
47+
end
48+
49+
it "returns the text if it's less than DETECTION_CHAR_LIMIT" do
50+
text = "a" * 999
51+
post.cooked = text
52+
expect(DiscourseTranslator::Base.text_for_detection(post)).to eq(text)
53+
end
54+
end
55+
56+
describe ".text_for_translation" do
57+
fab!(:post)
58+
59+
it "truncates to max_characters_per_translation" do
60+
post.cooked = "a" * (SiteSetting.max_characters_per_translation + 1)
61+
expect(DiscourseTranslator::Base.text_for_translation(post).length).to eq(
62+
SiteSetting.max_characters_per_translation,
63+
)
64+
end
65+
end
3566
end

spec/services/google_spec.rb

Lines changed: 40 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,14 +43,14 @@
4343
end
4444
end
4545

46-
it "should truncate string to 5000 characters" do
47-
length = 6000
46+
it "should truncate string to 1000 characters" do
47+
length = 2000
4848
post.cooked = rand(36**length).to_s(36)
4949
detected_lang = "en"
5050

5151
request_url = "#{DiscourseTranslator::Google::DETECT_URI}"
5252
body = {
53-
q: post.cooked.truncate(DiscourseTranslator::Google::MAXLENGTH, omission: nil),
53+
q: post.cooked.truncate(DiscourseTranslator::Google::DETECTION_CHAR_LIMIT, omission: nil),
5454
key: api_key,
5555
}
5656

@@ -166,5 +166,42 @@
166166

167167
expect { described_class.translate(post) }.to raise_error DiscourseTranslator::TranslatorError
168168
end
169+
170+
it "truncates text for translation to max_characters_per_translation setting" do
171+
SiteSetting.max_characters_per_translation = 50
172+
post.cooked = "a" * 100
173+
post.custom_fields[DiscourseTranslator::DETECTED_LANG_CUSTOM_FIELD] = "de"
174+
post.save_custom_fields
175+
body = {
176+
q: post.cooked.truncate(SiteSetting.max_characters_per_translation, omission: nil),
177+
source: "de",
178+
target: "en",
179+
key: api_key,
180+
}
181+
182+
translated_text = "hur dur hur dur"
183+
Excon
184+
.expects(:post)
185+
.with(
186+
DiscourseTranslator::Google::TRANSLATE_URI,
187+
body: URI.encode_www_form(body),
188+
headers: {
189+
"Content-Type" => "application/x-www-form-urlencoded",
190+
"Referer" => "http://test.localhost",
191+
},
192+
)
193+
.returns(
194+
mock_response.new(
195+
200,
196+
%{ { "data": { "translations": [ { "translatedText": "#{translated_text}" } ] } } },
197+
),
198+
)
199+
.once
200+
Excon.expects(:post).returns(
201+
mock_response.new(200, %{ { "data": { "languages": [ { "language": "de" }] } } }),
202+
)
203+
204+
expect(described_class.translate(post)).to eq(["de", translated_text])
205+
end
169206
end
170207
end

0 commit comments

Comments
 (0)