Skip to content

Commit 64fbe77

Browse files
committed
See #221 and #267. Custom lxml parser based on the one defined at xmldefused. Parser will ignore comments and processing instructions and by default have deactivated huge_tree, DTD and access to external documents
1 parent 60c8cec commit 64fbe77

File tree

10 files changed

+167
-23
lines changed

10 files changed

+167
-23
lines changed

setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
},
3333
test_suite='tests',
3434
install_requires=[
35+
'lxml>=3.3.5',
3536
'dm.xmlsec.binding==1.3.7',
3637
'isodate>=0.5.0',
3738
'defusedxml>=0.6.0',

src/onelogin/saml2/auth.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,16 +13,16 @@
1313

1414
from base64 import b64encode
1515
from urllib import quote_plus
16-
from defusedxml.lxml import tostring
1716

18-
from onelogin.saml2.settings import OneLogin_Saml2_Settings
19-
from onelogin.saml2.response import OneLogin_Saml2_Response
17+
from onelogin.saml2.authn_request import OneLogin_Saml2_Authn_Request
18+
from onelogin.saml2.constants import OneLogin_Saml2_Constants
2019
from onelogin.saml2.errors import OneLogin_Saml2_Error
2120
from onelogin.saml2.logout_response import OneLogin_Saml2_Logout_Response
22-
from onelogin.saml2.constants import OneLogin_Saml2_Constants
23-
from onelogin.saml2.utils import OneLogin_Saml2_Utils, xmlsec
2421
from onelogin.saml2.logout_request import OneLogin_Saml2_Logout_Request
25-
from onelogin.saml2.authn_request import OneLogin_Saml2_Authn_Request
22+
from onelogin.saml2.response import OneLogin_Saml2_Response
23+
from onelogin.saml2.settings import OneLogin_Saml2_Settings
24+
from onelogin.saml2.utils import OneLogin_Saml2_Utils, xmlsec
25+
from onelogin.saml2.xmlparser import tostring
2626

2727

2828
class OneLogin_Saml2_Auth(object):

src/onelogin/saml2/idp_metadata_parser.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,10 @@
1313
import ssl
1414

1515
from copy import deepcopy
16-
from defusedxml.lxml import fromstring
1716

1817
from onelogin.saml2.constants import OneLogin_Saml2_Constants
1918
from onelogin.saml2.utils import OneLogin_Saml2_Utils
19+
from onelogin.saml2.xmlparser import fromstring
2020

2121

2222
class OneLogin_Saml2_IdPMetadataParser(object):

src/onelogin/saml2/logout_request.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,12 @@
1212
from zlib import decompress
1313
from base64 import b64encode, b64decode
1414
from lxml import etree
15-
from defusedxml.lxml import fromstring
1615
from xml.dom.minidom import Document
1716

1817
from onelogin.saml2.constants import OneLogin_Saml2_Constants
19-
from onelogin.saml2.utils import OneLogin_Saml2_Utils
2018
from onelogin.saml2.errors import OneLogin_Saml2_Error, OneLogin_Saml2_ValidationError
21-
19+
from onelogin.saml2.utils import OneLogin_Saml2_Utils
20+
from onelogin.saml2.xmlparser import fromstring
2221

2322
class OneLogin_Saml2_Logout_Request(object):
2423
"""

src/onelogin/saml2/logout_response.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,13 @@
1010
"""
1111

1212
from base64 import b64encode, b64decode
13-
from defusedxml.lxml import fromstring
14-
1513
from xml.dom.minidom import Document
1614
from defusedxml.minidom import parseString
1715

1816
from onelogin.saml2.constants import OneLogin_Saml2_Constants
19-
from onelogin.saml2.utils import OneLogin_Saml2_Utils
2017
from onelogin.saml2.errors import OneLogin_Saml2_Error, OneLogin_Saml2_ValidationError
18+
from onelogin.saml2.utils import OneLogin_Saml2_Utils
19+
from onelogin.saml2.xmlparser import fromstring
2120

2221

2322
class OneLogin_Saml2_Logout_Response(object):

src/onelogin/saml2/metadata.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -247,7 +247,7 @@ def add_x509_key_descriptors(metadata, cert=None, add_encryption=True):
247247
if cert is None or cert == '':
248248
return metadata
249249
try:
250-
xml = parseString(metadata.encode('utf-8'), forbid_dtd=True)
250+
xml = parseString(metadata.encode('utf-8'), forbid_dtd=True, forbid_entities=True, forbid_external=True)
251251
except Exception as e:
252252
raise Exception('Error parsing metadata. ' + e.message)
253253

src/onelogin/saml2/response.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,12 @@
1111

1212
from base64 import b64decode
1313
from copy import deepcopy
14-
from defusedxml.lxml import tostring, fromstring
1514
from xml.dom.minidom import Document
1615

1716
from onelogin.saml2.constants import OneLogin_Saml2_Constants
18-
from onelogin.saml2.utils import OneLogin_Saml2_Utils, return_false_on_exception
1917
from onelogin.saml2.errors import OneLogin_Saml2_Error, OneLogin_Saml2_ValidationError
18+
from onelogin.saml2.utils import OneLogin_Saml2_Utils, return_false_on_exception
19+
from onelogin.saml2.xmlparser import tostring, fromstring
2020

2121

2222
class OneLogin_Saml2_Response(object):

src/onelogin/saml2/utils.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
from hashlib import sha1, sha256, sha384, sha512
1717
from isodate import parse_duration as duration_parser
1818
from lxml import etree
19-
from defusedxml.lxml import tostring, fromstring
2019
from os.path import basename, dirname, join
2120
import re
2221
from sys import stderr
@@ -35,6 +34,7 @@
3534

3635
from onelogin.saml2.constants import OneLogin_Saml2_Constants
3736
from onelogin.saml2.errors import OneLogin_Saml2_Error, OneLogin_Saml2_ValidationError
37+
from onelogin.saml2.xmlparser import tostring, fromstring
3838

3939

4040
if not globals().get('xmlsec_setup', False):
@@ -164,11 +164,12 @@ def validate_xml(xml, schema, debug=False):
164164

165165
return 'invalid_xml'
166166

167-
return parseString(tostring(dom, encoding='unicode').encode('utf-8'), forbid_dtd=True)
167+
return parseString(tostring(dom, encoding='unicode').encode('utf-8'), forbid_dtd=True, forbid_entities=True, forbid_external=True)
168168

169169
@staticmethod
170170
def element_text(node):
171-
etree.strip_tags(node, etree.Comment)
171+
# Double check, the LXML Parser already removes comments
172+
#etree.strip_tags(node, etree.Comment)
172173
return node.text
173174

174175
@staticmethod
@@ -716,7 +717,7 @@ def generate_name_id(value, sp_nq, sp_format=None, cert=None, debug=False, nq=No
716717

717718
edata = enc_ctx.encryptXml(enc_data, elem[0])
718719

719-
newdoc = parseString(tostring(edata, encoding='unicode').encode('utf-8'), forbid_dtd=True)
720+
newdoc = parseString(tostring(edata, encoding='unicode').encode('utf-8'), forbid_dtd=True, forbid_entities=True, forbid_external=True)
720721

721722
if newdoc.hasChildNodes():
722723
child = newdoc.firstChild

src/onelogin/saml2/xmlparser.py

Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
# Based on the lxml example from defusedxml
2+
#
3+
# Copyright (c) 2013 by Christian Heimes <[email protected]>
4+
# Licensed to PSF under a Contributor Agreement.
5+
# See https://www.python.org/psf/license for licensing details.
6+
"""lxml.etree protection"""
7+
8+
from __future__ import print_function, absolute_import
9+
10+
import threading
11+
12+
from lxml import etree as _etree
13+
14+
from defusedxml.lxml import DTDForbidden, EntitiesForbidden, NotSupportedError
15+
16+
LXML3 = _etree.LXML_VERSION[0] >= 3
17+
18+
__origin__ = "lxml.etree"
19+
20+
tostring = _etree.tostring
21+
22+
23+
class RestrictedElement(_etree.ElementBase):
24+
"""A restricted Element class that filters out instances of some classes
25+
"""
26+
27+
__slots__ = ()
28+
blacklist = (_etree._Entity, _etree._ProcessingInstruction, _etree._Comment)
29+
30+
def _filter(self, iterator):
31+
blacklist = self.blacklist
32+
for child in iterator:
33+
if isinstance(child, blacklist):
34+
continue
35+
yield child
36+
37+
def __iter__(self):
38+
iterator = super(RestrictedElement, self).__iter__()
39+
return self._filter(iterator)
40+
41+
def iterchildren(self, tag=None, reversed=False):
42+
iterator = super(RestrictedElement, self).iterchildren(tag=tag, reversed=reversed)
43+
return self._filter(iterator)
44+
45+
def iter(self, tag=None, *tags):
46+
iterator = super(RestrictedElement, self).iter(tag=tag, *tags)
47+
return self._filter(iterator)
48+
49+
def iterdescendants(self, tag=None, *tags):
50+
iterator = super(RestrictedElement, self).iterdescendants(tag=tag, *tags)
51+
return self._filter(iterator)
52+
53+
def itersiblings(self, tag=None, preceding=False):
54+
iterator = super(RestrictedElement, self).itersiblings(tag=tag, preceding=preceding)
55+
return self._filter(iterator)
56+
57+
def getchildren(self):
58+
iterator = super(RestrictedElement, self).__iter__()
59+
return list(self._filter(iterator))
60+
61+
def getiterator(self, tag=None):
62+
iterator = super(RestrictedElement, self).getiterator(tag)
63+
return self._filter(iterator)
64+
65+
66+
class GlobalParserTLS(threading.local):
67+
"""Thread local context for custom parser instances
68+
"""
69+
70+
parser_config = {
71+
"resolve_entities": False,
72+
'remove_comments': True,
73+
'no_network': True,
74+
'remove_pis': True,
75+
'huge_tree': False
76+
}
77+
78+
element_class = RestrictedElement
79+
80+
def createDefaultParser(self):
81+
parser = _etree.XMLParser(**self.parser_config)
82+
element_class = self.element_class
83+
if self.element_class is not None:
84+
lookup = _etree.ElementDefaultClassLookup(element=element_class)
85+
parser.set_element_class_lookup(lookup)
86+
return parser
87+
88+
def setDefaultParser(self, parser):
89+
self._default_parser = parser
90+
91+
def getDefaultParser(self):
92+
parser = getattr(self, "_default_parser", None)
93+
if parser is None:
94+
parser = self.createDefaultParser()
95+
self.setDefaultParser(parser)
96+
return parser
97+
98+
99+
_parser_tls = GlobalParserTLS()
100+
getDefaultParser = _parser_tls.getDefaultParser
101+
102+
103+
def check_docinfo(elementtree, forbid_dtd=False, forbid_entities=True):
104+
"""Check docinfo of an element tree for DTD and entity declarations
105+
The check for entity declarations needs lxml 3 or newer. lxml 2.x does
106+
not support dtd.iterentities().
107+
"""
108+
docinfo = elementtree.docinfo
109+
if docinfo.doctype:
110+
if forbid_dtd:
111+
raise DTDForbidden(docinfo.doctype, docinfo.system_url, docinfo.public_id)
112+
if forbid_entities and not LXML3:
113+
# lxml < 3 has no iterentities()
114+
raise NotSupportedError("Unable to check for entity declarations " "in lxml 2.x")
115+
116+
if forbid_entities:
117+
for dtd in docinfo.internalDTD, docinfo.externalDTD:
118+
if dtd is None:
119+
continue
120+
for entity in dtd.iterentities():
121+
raise EntitiesForbidden(entity.name, entity.content, None, None, None, None)
122+
123+
124+
def parse(source, parser=None, base_url=None, forbid_dtd=True, forbid_entities=True):
125+
if parser is None:
126+
parser = getDefaultParser()
127+
elementtree = _etree.parse(source, parser, base_url=base_url)
128+
check_docinfo(elementtree, forbid_dtd, forbid_entities)
129+
return elementtree
130+
131+
132+
def fromstring(text, parser=None, base_url=None, forbid_dtd=True, forbid_entities=True):
133+
if parser is None:
134+
parser = getDefaultParser()
135+
rootelement = _etree.fromstring(text, parser, base_url=base_url)
136+
elementtree = rootelement.getroottree()
137+
check_docinfo(elementtree, forbid_dtd, forbid_entities)
138+
return rootelement
139+
140+
141+
XML = fromstring
142+
143+
144+
def iterparse(*args, **kwargs):
145+
raise NotSupportedError("iterparse not available")

tests/src/OneLogin/saml2_tests/utils_test.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,16 +5,16 @@
55

66
from base64 import b64decode
77
import json
8-
from defusedxml.lxml import fromstring
98
from lxml import etree
109
from os.path import dirname, join, exists
1110
import unittest
1211
from xml.dom.minidom import Document, parseString
1312

1413
from onelogin.saml2.constants import OneLogin_Saml2_Constants
14+
from onelogin.saml2.errors import OneLogin_Saml2_Error, OneLogin_Saml2_ValidationError
1515
from onelogin.saml2.settings import OneLogin_Saml2_Settings
1616
from onelogin.saml2.utils import OneLogin_Saml2_Utils
17-
from onelogin.saml2.errors import OneLogin_Saml2_Error, OneLogin_Saml2_ValidationError
17+
from onelogin.saml2.xmlparser import fromstring
1818

1919

2020
class OneLogin_Saml2_Utils_Test(unittest.TestCase):
@@ -1035,7 +1035,6 @@ def testValidateSign(self):
10351035
with self.assertRaisesRegexp(OneLogin_Saml2_ValidationError, "Expected exactly one signature node; got 0."):
10361036
OneLogin_Saml2_Utils.validate_sign(wrapping_attack1, cert, raise_exceptions=True)
10371037

1038-
10391038
if __name__ == '__main__':
10401039
runner = unittest.TextTestRunner()
10411040
unittest.main(testRunner=runner)

0 commit comments

Comments
 (0)