Skip to content

Commit 4cc6318

Browse files
authored
tolerate duplicate ids in xslt xml input (#396)
1 parent 1dc32bc commit 4cc6318

File tree

2 files changed

+20
-1
lines changed

2 files changed

+20
-1
lines changed

sciencebeam/transformers/xslt.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@
1313

1414
def _to_xslt_input(value: Union[bytes, str, T_XSLT_Input]) -> T_XSLT_Input:
1515
if isinstance(value, (bytes, str)):
16-
return etree.fromstring(value)
16+
parser = etree.XMLParser(recover=True)
17+
return etree.fromstring(value, parser=parser)
1718
return value
1819

1920

tests/transformers/xslt_test.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
from lxml import etree
2+
3+
from sciencebeam.transformers.xslt import _to_xslt_input
4+
5+
6+
class TestToXsltInput:
7+
def test_should_tolerate_duplicate_ids(self):
8+
result: etree.ElementBase = _to_xslt_input(
9+
'''
10+
<xml>
11+
<item xml:id="id1">item 1</item>
12+
<item xml:id="id1">item 2</item>
13+
</xml>
14+
'''
15+
)
16+
items = result.findall('item')
17+
assert len(items) == 2
18+
assert [item.text for item in items] == ['item 1', 'item 2']

0 commit comments

Comments
 (0)