@@ -41,7 +41,7 @@ def add_child(self, child: 'HtmlSection') -> None:
41
41
42
42
def get_heading_text (self ) -> str :
43
43
"""Get the text of the heading for this section."""
44
- if self .heading_tag :
44
+ if self .heading_tag is not None :
45
45
try :
46
46
return self .heading_tag .get_text (strip = True )
47
47
except Exception :
@@ -56,7 +56,7 @@ def get_html(self) -> str:
56
56
57
57
try :
58
58
result = []
59
- if self .heading_tag :
59
+ if self .heading_tag is not None :
60
60
result .append (str (self .heading_tag ))
61
61
62
62
for item in self .content :
@@ -69,7 +69,7 @@ def get_html(self) -> str:
69
69
return self .html
70
70
except Exception as e :
71
71
# Fallback in case of error
72
- if self .heading_tag :
72
+ if self .heading_tag is not None :
73
73
return str (self .heading_tag )
74
74
return ""
75
75
@@ -97,7 +97,7 @@ def parse_html(html_content: str) -> Tuple[BeautifulSoup, HtmlSection]:
97
97
# First pass: identify all headings
98
98
all_headings = []
99
99
for element in body .find_all (['h1' , 'h2' , 'h3' , 'h4' , 'h5' , 'h6' ]):
100
- if element .name and re .match (r'h[1-6]$' , element .name ):
100
+ if element .name is not None and re .match (r'h[1-6]$' , element .name ):
101
101
level = int (element .name [1 ])
102
102
all_headings .append ((element , level ))
103
103
@@ -144,16 +144,16 @@ def parse_html(html_content: str) -> Tuple[BeautifulSoup, HtmlSection]:
144
144
current_section = root_section
145
145
146
146
for element in body .children :
147
- if not element or (isinstance (element , str ) and not element .strip ()):
147
+ if element is None or (isinstance (element , str ) and not element .strip ()):
148
148
continue
149
149
150
150
is_section_start = False
151
151
new_level = None
152
152
153
- if isinstance (element , Tag ) and element .name and re .match (r'h[1-6]$' , element .name ):
153
+ if isinstance (element , Tag ) and element .name is not None and re .match (r'h[1-6]$' , element .name ):
154
154
level = int (element .name [1 ])
155
155
for section in _flatten_sections (root_section ):
156
- if section .heading_tag and section .heading_tag == element :
156
+ if section .heading_tag is not None and section .heading_tag == element :
157
157
is_section_start = True
158
158
new_level = level
159
159
current_section = section
@@ -163,7 +163,7 @@ def parse_html(html_content: str) -> Tuple[BeautifulSoup, HtmlSection]:
163
163
current_section .add_content (element )
164
164
else :
165
165
for element in body .children :
166
- if element :
166
+ if element is not None :
167
167
root_section .add_content (element )
168
168
169
169
return soup , root_section
@@ -173,7 +173,7 @@ def parse_html(html_content: str) -> Tuple[BeautifulSoup, HtmlSection]:
173
173
root_section = HtmlSection ()
174
174
175
175
for element in soup .children :
176
- if element :
176
+ if element is not None :
177
177
root_section .add_content (element )
178
178
179
179
return soup , root_section
@@ -261,15 +261,15 @@ def identify_procedure_sections(soup: BeautifulSoup) -> list[dict]:
261
261
# Multiple ways to identify procedures
262
262
procedure_markers = []
263
263
for element in soup .find_all (string = lambda text : text and "Procedure" in text ):
264
- if element .parent and element .parent .name not in ('script' , 'style' ):
264
+ if element .parent is not None and element .parent .name not in ('script' , 'style' ):
265
265
procedure_markers .append (element )
266
266
267
267
ordered_lists = soup .find_all ('ol' )
268
268
269
269
processed_lists = set ()
270
270
271
271
for marker in procedure_markers :
272
- if not marker or not marker .parent :
272
+ if marker is None or marker .parent is None :
273
273
continue
274
274
275
275
ol = None
@@ -283,24 +283,24 @@ def identify_procedure_sections(soup: BeautifulSoup) -> list[dict]:
283
283
break
284
284
285
285
next_sibling = current .find_next_sibling ()
286
- if next_sibling and next_sibling .name == 'ol' :
286
+ if next_sibling is not None and next_sibling .name == 'ol' :
287
287
ol = next_sibling
288
288
break
289
289
290
290
ol_in_children = current .find ('ol' )
291
- if ol_in_children :
291
+ if ol_in_children is not None :
292
292
ol = ol_in_children
293
293
break
294
294
295
295
current = current .find_next ()
296
296
297
- if not ol or id (ol ) in processed_lists :
297
+ if ol is None or id (ol ) in processed_lists :
298
298
continue
299
299
300
300
heading = _find_closest_heading (marker .parent )
301
301
302
302
intro = []
303
- if heading :
303
+ if heading is not None :
304
304
current = heading .find_next ()
305
305
while current and current != marker .parent and current != ol :
306
306
if current .name not in ('script' , 'style' ):
@@ -342,7 +342,7 @@ def identify_procedure_sections(soup: BeautifulSoup) -> list[dict]:
342
342
343
343
# Find introduction elements
344
344
intro = []
345
- if heading :
345
+ if heading is not None :
346
346
current = heading .find_next ()
347
347
while current and current != ol :
348
348
if current .name not in ('script' , 'style' ):
@@ -364,7 +364,7 @@ def identify_procedure_sections(soup: BeautifulSoup) -> list[dict]:
364
364
break
365
365
366
366
# Add to procedures if it looks like a procedure
367
- if heading or marker or prerequisites :
367
+ if heading is not None or marker is not None or prerequisites is not None :
368
368
procedures .append ({
369
369
'heading' : heading ,
370
370
'intro' : intro ,
@@ -392,7 +392,7 @@ def _find_closest_heading(element: Tag) -> Optional[Tag]:
392
392
Returns:
393
393
The closest heading, or None if not found.
394
394
"""
395
- if not element :
395
+ if element is None :
396
396
return None
397
397
398
398
# Check previous siblings
@@ -407,7 +407,7 @@ def _find_closest_heading(element: Tag) -> Optional[Tag]:
407
407
return current
408
408
409
409
# Check parent's previous siblings
410
- if element .parent :
410
+ if element .parent is not None :
411
411
return _find_closest_heading (element .parent )
412
412
413
413
return None
@@ -443,7 +443,7 @@ def identify_code_blocks(soup: BeautifulSoup) -> list[dict]:
443
443
processed_tags .add (id (pre ))
444
444
445
445
# Skip if this pre tag is inside a code tag that we'll process later
446
- if pre .parent and pre .parent .name == 'code' and pre .parent in code_tags :
446
+ if pre .parent is not None and pre .parent .name == 'code' and pre .parent in code_tags :
447
447
continue
448
448
449
449
# Find the previous paragraph for context
@@ -467,7 +467,7 @@ def identify_code_blocks(soup: BeautifulSoup) -> list[dict]:
467
467
processed_tags .add (id (code ))
468
468
469
469
# Skip if this code tag is inside a pre tag that we've already processed
470
- if code .parent and code .parent .name == 'pre' and id (code .parent ) in processed_tags :
470
+ if code .parent is not None and code .parent .name == 'pre' and id (code .parent ) in processed_tags :
471
471
continue
472
472
473
473
# Find the previous paragraph for context
@@ -510,7 +510,7 @@ def identify_tables(soup: BeautifulSoup) -> list[dict]:
510
510
if tag .name == 'rh-table' :
511
511
# Look for nested table
512
512
nested_table = tag .find ('table' )
513
- if nested_table :
513
+ if nested_table is not None :
514
514
expanded_tables .append (nested_table )
515
515
else :
516
516
expanded_tables .append (tag )
@@ -534,7 +534,7 @@ def identify_tables(soup: BeautifulSoup) -> list[dict]:
534
534
rows = []
535
535
try :
536
536
# Get rows not in header
537
- if header :
537
+ if header is not None :
538
538
header_rows = set (id (row ) for row in header .find_all ('tr' ))
539
539
all_rows = table .find_all ('tr' , limit = MAX_TABLE_ROWS )
540
540
rows = [row for row in all_rows if id (row ) not in header_rows ]
0 commit comments