Skip to content

Commit 1e050bd

Browse files
committed
clean up whitespace event logic
1 parent b2bb68c commit 1e050bd

File tree

1 file changed

+4
-8
lines changed

1 file changed

+4
-8
lines changed

lib/boilerpipe/sax/html_content_handler.rb

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ def initialize
1010
@label_stacks = []
1111
@tag_actions = ::Boilerpipe::SAX::TagActionMap.tag_actions
1212
@tag_level = 0
13-
@sb_last_was_whitespace = false
1413
@text_buffer = ''
1514
@token_buffer = ''
1615
@offset_blocks = 0
@@ -61,7 +60,6 @@ def characters(text)
6160
# add a single space if the block was only whitespace
6261
if text.empty?
6362
append_space
64-
@last_event = :WHITESPACE
6563
return
6664
end
6765

@@ -72,7 +70,6 @@ def characters(text)
7270
append_text(text)
7371
append_space if ended_with_whitespace
7472

75-
@last_event = :CHARACTERS
7673
end
7774

7875
def end_element(name)
@@ -112,7 +109,7 @@ def flush_block
112109
when 0
113110
return
114111
when 1
115-
clear_buffers if @sb_last_was_whitespace
112+
clear_buffers if @last_event == :WHITESPACE
116113
return
117114
end
118115

@@ -230,16 +227,15 @@ def add_text_block(text_block)
230227

231228
# append space if last character wasn't already one
232229
def append_space
233-
return if @sb_last_was_whitespace
234-
235-
@sb_last_was_whitespace = true
230+
return if @last_event == :WHITESPACE
231+
@last_event = :WHITESPACE
236232

237233
@text_buffer << ' '
238234
@token_buffer << ' '
239235
end
240236

241237
def append_text(text)
242-
@sb_last_was_whitespace = false
238+
@last_event = :CHARACTERS
243239
@text_buffer << text
244240
@token_buffer << text
245241
end

0 commit comments

Comments
 (0)