diff --git a/app/services/discourse_translator/base.rb b/app/services/discourse_translator/base.rb index cf8caa3a..3f04e47a 100644 --- a/app/services/discourse_translator/base.rb +++ b/app/services/discourse_translator/base.rb @@ -61,6 +61,12 @@ def self.get_text(topic_or_post) end end + def self.strip_img_for_detection(detection_text) + html_doc = Nokogiri::HTML::DocumentFragment.parse(detection_text) + html_doc.css("img").remove + html_doc.to_html + end + def self.language_supported?(detected_lang) raise NotImplementedError unless self.const_defined?(:SUPPORTED_LANG_MAPPING) supported_lang = const_get(:SUPPORTED_LANG_MAPPING) diff --git a/app/services/discourse_translator/google.rb b/app/services/discourse_translator/google.rb index 415a8228..75d112a8 100644 --- a/app/services/discourse_translator/google.rb +++ b/app/services/discourse_translator/google.rb @@ -76,9 +76,11 @@ def self.access_token end def self.detect(topic_or_post) + detection_text = get_text(topic_or_post).truncate(MAXLENGTH, omission: nil) + detection_text = strip_img_for_detection(detection_text) topic_or_post.custom_fields[DiscourseTranslator::DETECTED_LANG_CUSTOM_FIELD] ||= result( DETECT_URI, - q: get_text(topic_or_post).truncate(MAXLENGTH, omission: nil), + q: detection_text, )[ "detections" ][ diff --git a/spec/services/google_spec.rb b/spec/services/google_spec.rb index b55060c0..94aa75ea 100644 --- a/spec/services/google_spec.rb +++ b/spec/services/google_spec.rb @@ -19,7 +19,7 @@ end describe ".detect" do - let(:post) { Fabricate(:post) } + fab!(:post) it "should store the detected language in a custom field" do detected_lang = "en" @@ -74,6 +74,34 @@ expect(described_class.detect(post)).to eq(detected_lang) end + + it "strips img tags from detection text" do + post.cooked = "there are some words to be said" + detected_lang = "en" + + request_url = "#{DiscourseTranslator::Google::DETECT_URI}" + body = { q: "there are some words to be said", key: api_key } + + Excon + .expects(:post) + .with( + request_url, + body: URI.encode_www_form(body), + headers: { + "Content-Type" => "application/x-www-form-urlencoded", + "Referer" => "http://test.localhost", + }, + ) + .returns( + mock_response.new( + 200, + %{ { "data": { "detections": [ [ { "language": "#{detected_lang}", "isReliable": false, "confidence": 0.18397073 } ] ] } } }, + ), + ) + .once + + expect(described_class.detect(post)).to eq(detected_lang) + end end describe ".translate_supported?" do