Skip to content

Commit ab71ee9

Browse files
authored
FIX: Also strip anchor tags when detecting language (#187)
When @ mentioning a user, there is a chance that the username is detected as the language rather than the post content itself. Stripping the anchor tag will help with better language detection.
1 parent fdba172 commit ab71ee9

File tree

2 files changed

+26
-2
lines changed

2 files changed

+26
-2
lines changed

app/services/discourse_translator/base.rb

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,14 +72,17 @@ def self.language_supported?(detected_lang)
7272

7373
private
7474

75-
def self.strip_img_for_detection(detection_text)
75+
def self.strip_tags_for_detection(detection_text)
7676
html_doc = Nokogiri::HTML::DocumentFragment.parse(detection_text)
7777
html_doc.css("img").remove
78+
html_doc.css("a.mention,a.lightbox").remove
7879
html_doc.to_html
7980
end
8081

8182
def self.text_for_detection(topic_or_post)
82-
strip_img_for_detection(get_text(topic_or_post).truncate(DETECTION_CHAR_LIMIT, omission: nil))
83+
strip_tags_for_detection(
84+
get_text(topic_or_post).truncate(DETECTION_CHAR_LIMIT, omission: nil),
85+
)
8386
end
8487

8588
def self.text_for_translation(topic_or_post)

spec/services/base_spec.rb

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,27 @@ class EmptyTranslator < DiscourseTranslator::Base
4141
expect(DiscourseTranslator::Base.text_for_detection(post)).to eq("")
4242
end
4343

44+
it "strips @ mention anchor tags" do
45+
post.cooked = "<a class='mention' href='/u/cat' >cat</a>"
46+
expect(DiscourseTranslator::Base.text_for_detection(post)).to eq("")
47+
end
48+
49+
it "strips lightbox anchor tags" do
50+
post.cooked = "<a class='lightbox' href='http://cloudfront.net/image.png' />"
51+
expect(DiscourseTranslator::Base.text_for_detection(post)).to eq("")
52+
end
53+
54+
it "leaves other anchor tags alone" do
55+
cooked = <<~HTML
56+
<p>
57+
<a href="http://cat.com/image.png"></a>
58+
<a class="derp" href="http://cat.com/image.png"></a>
59+
</p>
60+
HTML
61+
post.cooked = cooked
62+
expect(DiscourseTranslator::Base.text_for_detection(post)).to eq(cooked)
63+
end
64+
4465
it "truncates to DETECTION_CHAR_LIMIT of 1000" do
4566
post.cooked = "a" * 1001
4667
expect(DiscourseTranslator::Base.text_for_detection(post).length).to eq(1000)

0 commit comments

Comments
 (0)