Skip to content

Commit 7ea37f4

Browse files
committed
clean up whitespace event logic
1 parent 7f3e5e0 commit 7ea37f4

File tree

1 file changed

+4
-8
lines changed

1 file changed

+4
-8
lines changed

lib/boilerpipe/sax/html_content_handler.rb

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ def initialize
1010
@label_stacks = []
1111
@tag_actions = ::Boilerpipe::SAX::TagActionMap.tag_actions
1212
@tag_level = 0
13-
@sb_last_was_whitespace = false
1413
@text_buffer = ''
1514
@token_buffer = ''
1615
@offset_blocks = 0
@@ -61,7 +60,6 @@ def characters(text)
6160
# add a single space if the block was only whitespace
6261
if text.empty?
6362
append_space
64-
@last_event = :WHITESPACE
6563
return
6664
end
6765

@@ -72,7 +70,6 @@ def characters(text)
7270
append_text(text)
7371
append_space if ended_with_whitespace
7472

75-
@last_event = :CHARACTERS
7673
end
7774

7875
def end_element(name)
@@ -106,7 +103,7 @@ def flush_block
106103
when 0
107104
return
108105
when 1
109-
clear_buffers if @sb_last_was_whitespace
106+
clear_buffers if @last_event == :WHITESPACE
110107
return
111108
end
112109

@@ -224,16 +221,15 @@ def add_text_block(text_block)
224221

225222
# append space if last character wasn't already one
226223
def append_space
227-
return if @sb_last_was_whitespace
228-
229-
@sb_last_was_whitespace = true
224+
return if @last_event == :WHITESPACE
225+
@last_event = :WHITESPACE
230226

231227
@text_buffer << ' '
232228
@token_buffer << ' '
233229
end
234230

235231
def append_text(text)
236-
@sb_last_was_whitespace = false
232+
@last_event = :CHARACTERS
237233
@text_buffer << text
238234
@token_buffer << text
239235
end

0 commit comments

Comments
 (0)