Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 12 additions & 8 deletions wiktionaryparser/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,23 +148,27 @@ def parse_pronunciations(self, word_contents):
pronunciation_text = []
span_tag = self.soup.find_all('span', {'id': pronunciation_id})[0]
list_tag = span_tag.parent

list_elements = []
while list_tag.name != 'ul':
list_tag = list_tag.find_next_sibling()
if list_tag.name == 'p':
pronunciation_text.append(list_tag.text)
break
if list_tag.name == 'div' and any(_ in pronunciation_div_classes for _ in list_tag['class']):
break
for super_tag in list_tag.find_all('sup'):
super_tag.clear()
for list_element in list_tag.find_all('li'):
if list_tag.name == 'p': break
list_elements += list_tag.find_all('li')

for list_element in list_elements:
for super_tag in list_element.find_all('sup'):
super_tag.clear()

for audio_tag in list_element.find_all('div', {'class': 'mediaContainer'}):
audio_links.append(audio_tag.find('source')['src'])
audio_tag.extract()

for nested_list_element in list_element.find_all('ul'):
nested_list_element.extract()

if list_element.text and not list_element.find('table', {'class': 'audiotable'}):
pronunciation_text.append(list_element.text.strip())

pronunciation_list.append((pronunciation_index, pronunciation_text, audio_links))
return pronunciation_list

Expand Down