Skip to content

Commit cb2569b

Browse files
dracosTheyWorkForYou Live CVS User
authored andcommitted
[UK] Fix broken table parsing.
1 parent e038045 commit cb2569b

File tree

2 files changed

+9
-6
lines changed

2 files changed

+9
-6
lines changed

pyscraper/gidmatching.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,8 +46,8 @@ def PrepareXMLForDiff(scrapeversion):
4646
assert chk[0] == chk[3] # chunk type (this can fail if due to the lack of two \n's between the two labels, and thus detects an empty speech, which should not be there.
4747
# new_chk = chk[2]
4848
new_chk = re.sub(
49-
'(?s)(<p\s[^>]*>)(.*?)(<\/p>)',
50-
lambda m: (u''.join((m.group(1), re.sub('\n', ' ', m.group(2)), m.group(3)))),
49+
r'(?s)(<(p|tr)\s[^>]*>)(.*?)(<\/\2>)',
50+
lambda m: (u''.join((m.group(1), re.sub('\n', ' ', m.group(3)), m.group(4)))),
5151
chk[2]
5252
)
5353
essxindx.append(len(essxlist))

pyscraper/new_hansard.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -763,20 +763,23 @@ def parse_votelist(self, votes, direction, vote_list, is_teller=False):
763763
return vote_list
764764

765765
def parse_table(self, wrapper):
766-
rows = wrapper.xpath('.//row')
766+
rows = wrapper.xpath('.//ns:row', namespaces=self.ns_map)
767767
tag = etree.Element('table')
768768
body = etree.Element('tbody')
769769
url = None
770770
for row in rows:
771771
row_tag = etree.Element('tr')
772772
row_tag.set('pid', self.get_pid())
773773

774-
for entry in row.xpath('(.//hs_brev|.//hs_Para)'):
774+
for entry in row.xpath('(.//ns:hs_brev|.//ns:hs_Para)', namespaces=self.ns_map):
775775
if url is None:
776776
url = entry.get('url')
777-
row_tag.append(list(entry))
777+
td_tag = etree.Element('td')
778+
td_tag.text = self.get_single_line_text_from_element(entry)
779+
row_tag.append(td_tag)
778780

779-
body.append(row_tag)
781+
if len(row_tag):
782+
body.append(row_tag)
780783

781784
tag.append(body)
782785

0 commit comments

Comments
 (0)