Skip to content
This repository was archived by the owner on Jul 22, 2025. It is now read-only.

Commit 5631379

Browse files
committed
combine selectors
1 parent 75ca7cf commit 5631379

File tree

2 files changed

+14
-26
lines changed

2 files changed

+14
-26
lines changed

lib/translation/post_detection_text.rb

Lines changed: 14 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,16 @@
33
module DiscourseAi
44
module Translation
55
class PostDetectionText
6+
SELECTORS = [
7+
"a.hashtag-cooked", # categories or tags are usually in site's language
8+
"a.mention", # mentions are based on the mentioned's user's name
9+
"aside.onebox", # onebox external content
10+
".lightbox-wrapper", # image captions
11+
"blockquote, aside.quote",
12+
"img.emoji",
13+
"code, pre",
14+
]
15+
616
def self.get_text(post)
717
return if post.blank?
818
cooked = post.cooked
@@ -11,29 +21,12 @@ def self.get_text(post)
1121
doc = Nokogiri::HTML5.fragment(cooked)
1222
original = doc.text.strip
1323

14-
# quotes and blockquotes
15-
doc.css("blockquote, aside.quote").remove
16-
# image captions
17-
doc.css(".lightbox-wrapper").remove
18-
24+
# these selectors are preferred to be removed,
25+
# as they may not be in the user's language
26+
doc.css(*SELECTORS).remove
1927
necessary = doc.text.strip
2028

21-
# oneboxes (external content)
22-
doc.css("aside.onebox").remove
23-
# code blocks
24-
doc.css("code, pre").remove
25-
# hashtags
26-
doc.css("a.hashtag-cooked").remove
27-
# emoji
28-
doc.css("img.emoji").remove
29-
# mentions
30-
doc.css("a.mention").remove
31-
32-
preferred = doc.text.strip
33-
34-
return preferred if preferred.present?
35-
return necessary if necessary.present?
36-
original
29+
necessary.present? ? necessary : original
3730
end
3831
end
3932
end

spec/lib/translation/post_detection_text_spec.rb

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -58,11 +58,6 @@
5858
expect(described_class.get_text(post)).to eq("Hello World")
5959
end
6060

61-
it "falls back to necessary text when preferred is empty" do
62-
post.cooked = '<aside class="quote">Quote</aside><a class="mention">@user</a>'
63-
expect(described_class.get_text(post)).to eq("@user")
64-
end
65-
6661
it "falls back to cooked when all filtering removes all content" do
6762
post.cooked = "<blockquote>Quote</blockquote>"
6863
expect(described_class.get_text(post)).to eq("Quote")

0 commit comments

Comments
 (0)