@@ -135,22 +135,66 @@ def apply_datatype(chapter, ch_name):
135
135
return chapter
136
136
137
137
138
+ def get_top_level_sections (soup ):
139
+ """
140
+ Helper utility to grab top-level sections in main <article>. Returns
141
+ all but bibliography sections
142
+ """
143
+ section_wrappers = soup .find_all ("article" , attrs = {"role" : "main" })
144
+
145
+ # test case for partial files, not expected in production
146
+ if len (section_wrappers ) == 0 :
147
+ sections = soup .find_all ('section' )
148
+ elif len (section_wrappers ) != 1 :
149
+ article = soup .find ('article' , attrs = {"role" : "main" })
150
+ try :
151
+ main_title = article .find ('h1' ).get_text ()
152
+ except AttributeError :
153
+ main_title = soup .find ("h1" )
154
+ print ("Warning: " +
155
+ f"The chapter with title '{ main_title } ' is malformed." )
156
+ return None , None
157
+ else :
158
+ main = section_wrappers [0 ]
159
+ sections = []
160
+
161
+ for element in main .children :
162
+ if (
163
+ element .name == "section" and
164
+ element .get ('id' ) != "bibliography"
165
+ ):
166
+ sections .append (element )
167
+
168
+ return sections
169
+
170
+
138
171
def get_main_section (soup ):
139
172
"""
140
173
Gets the main "section," or the main chapter text, and additionally
141
174
checks to see if there is a separate bibliography section, returning
142
175
that if it exists to be dealt with later.
143
176
"""
144
- sections = soup .find_all ('section' )
177
+ sections = get_top_level_sections (soup )
178
+
145
179
try :
146
180
main = sections [0 ]
147
- except IndexError : # does not have a section class for top-level
148
- logging . warning ( "Looks like {toc_element.name} is malformed." )
149
- return None , None
181
+ except IndexError :
182
+ main = None
183
+
150
184
if len (sections ) > 1 :
151
- bibliography = soup .find ('section' , id = "bibliography" )
152
- else :
153
- bibliography = None
185
+ article = soup .find ('article' , attrs = {"role" : "main" })
186
+ try :
187
+ main_title = article .find ('h1' ).get_text ()
188
+ except AttributeError :
189
+ main_title = soup .find ("h1" )
190
+ err_msg = f"The chapter with title '{ main_title } ' " + \
191
+ "has extra <section>s " + \
192
+ "that will not be processed. Please check the " + \
193
+ "notebook source files."
194
+ logging .warning (err_msg )
195
+ print (err_msg )
196
+ bibliography = soup .find ('section' , id = "bibliography" )
197
+
154
198
return main , bibliography
155
199
156
200
@@ -172,11 +216,14 @@ def process_chapter_soup(
172
216
173
217
# perform initial swapping and namespace designation
174
218
chapter , bib = get_main_section (base_soup )
219
+ if bib and not chapter : # bibs can be their own chapters
220
+ chapter = bib
221
+ bib = None
175
222
176
223
if not chapter : # guard against malformed files
177
224
logging .warning (f"Failed to process { toc_element } ." )
178
225
raise RuntimeError (
179
- f"Failed to process { toc_element } . Please check for error in " +
226
+ f"Failed to process { toc_element } . Please check for errors in " +
180
227
"your source file(s). Contact the Tools team for additional " +
181
228
"support." )
182
229
@@ -189,8 +236,10 @@ def process_chapter_soup(
189
236
190
237
if chapter_parts :
191
238
for subfile in chapter_parts :
192
- subsection , sub_bib = process_chapter_subparts (subfile )
193
- chapter .append (subsection )
239
+ subsections , sub_bib = process_chapter_subparts (subfile )
240
+ if subsections :
241
+ for subsection in subsections :
242
+ chapter .append (subsection )
194
243
if bib and sub_bib :
195
244
entries = sub_bib .find_all ("dd" ) # type: ignore
196
245
bib .dl .extend (entries ) # type: ignore
@@ -211,19 +260,24 @@ def process_chapter_subparts(subfile):
211
260
""" processing for chapters with "sections" """
212
261
with open (subfile , 'r' ) as f :
213
262
soup = BeautifulSoup (f , 'lxml' )
214
- section , bib = get_main_section (soup )
215
- section ['data-type' ] = 'sect1' # type: ignore
216
- del section ['class' ] # type: ignore
217
- # move id from empty span to section
218
- try :
219
- section ['id' ] = section .select_one ('span' )['id' ] # type: ignore
220
- except TypeError :
221
- # fun fact, this happens when there's not numbering on the toc
222
- pass # like before, if it's not there that's OK.
223
- except KeyError :
224
- # fun fact, this happens when there is numbering on the toc
225
- pass # like before, if it's not there that's OK.
226
- return section , bib
263
+ top_level_sections = get_top_level_sections (soup )
264
+
265
+ for section in top_level_sections :
266
+ section ['data-type' ] = 'sect1' # type: ignore
267
+ del section ['class' ] # type: ignore
268
+ # move id from empty span to section
269
+ try :
270
+ section ['id' ] = section .select_one ( # type: ignore
271
+ 'span' )['id' ]
272
+ except TypeError :
273
+ # this happens when there's not numbering on the toc
274
+ pass # like before, if it's not there that's OK.
275
+ except KeyError :
276
+ # fun fact, this happens when there is numbering on the toc
277
+ pass # like before, if it's not there that's OK.
278
+ bibliography = soup .find ('section' , id = "bibliography" )
279
+
280
+ return top_level_sections , bibliography
227
281
228
282
229
283
def process_chapter (toc_element ,
0 commit comments