Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 16 additions & 16 deletions lib/boilerpipe/filters/block_proximity_fusion.rb
Original file line number Diff line number Diff line change
Expand Up @@ -32,25 +32,25 @@ def process(doc)
blocks_to_remove = []

blocks.each do |tb|
if tb.is_not_content?
# if tb.is_not_content?
# prev_block = tb
# next
# end
#
block_distance = tb.offset_blocks_start - prev_block.offset_blocks_end - 1

ok = block_distance <= @max_blocks_distance
ok = false if ok && @content_only && (prev_block.is_not_content? || tb.is_not_content?)
ok = false if ok && @same_tag_level_only && prev_block.tag_level != tb.tag_level

if ok
prev_block.merge_next(tb)
blocks_to_remove << tb
else
prev_block = tb
next
end

diff_blocks = tb.offset_blocks_start - prev_block.offset_blocks_end - 1
if diff_blocks <= @max_blocks_distance
ok = true
ok = false if (prev_block.is_not_content? || tb.is_not_content?) && @content_only
ok = false if ok && prev_block.tag_level != tb.tag_level && @same_tag_level_only

if ok
prev_block.merge_next(tb)
blocks_to_remove << tb
else
prev_block = tb
end
end
end

doc.replace_text_blocks!(text_blocks - blocks_to_remove)
doc
end
Expand Down
10 changes: 5 additions & 5 deletions lib/boilerpipe/sax/tag_action_map.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@ class TagActionMap
def self.tag_actions
labels = ::Boilerpipe::Labels
{
STYLE: TagActions::IgnorableElement.new,
SCRIPT: TagActions::IgnorableElement.new,
OPTION: TagActions::IgnorableElement.new,
OBJECT: TagActions::IgnorableElement.new,
EMBED: TagActions::IgnorableElement.new,
APPLET: TagActions::IgnorableElement.new,
EMBED: TagActions::IgnorableElement.new,
LINK: TagActions::IgnorableElement.new,
OPTION: TagActions::IgnorableElement.new,
OBJECT: TagActions::IgnorableElement.new,
SCRIPT: TagActions::IgnorableElement.new,
STYLE: TagActions::IgnorableElement.new,

A: TagActions::AnchorText.new,
BODY: TagActions::Body.new,
Expand Down