Skip to content

Commit c3f38da

Browse files
committed
Add attribute name validation to the validator
1 parent 5dc7583 commit c3f38da

File tree

2 files changed

+51
-4
lines changed

2 files changed

+51
-4
lines changed

AdvancedHTMLParser/Validator.py

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,13 @@
22
# Copyright (c) 2015, 2019 Tim Savannah All Rights Rserved under LGPLv3. See LICENSE (https://gnu.org/licenses/lgpl-3.0.txt) for more information.
33

44
from .Parser import AdvancedHTMLParser
5+
from .Tags import isValidAttributeName
56

6-
from .exceptions import InvalidCloseException, MissedCloseException
7+
from .exceptions import InvalidCloseException, MissedCloseException, InvalidAttributeNameException
78

8-
__all__ = ('InvalidCloseException', 'MissedCloseException', 'ValidatingAdvancedHTMLParser')
9+
__all__ = ('InvalidCloseException', 'MissedCloseException', 'InvalidAttributeNameException',
10+
'ValidatingAdvancedHTMLParser',
11+
)
912

1013
class ValidatingAdvancedHTMLParser(AdvancedHTMLParser):
1114
'''
@@ -16,6 +19,27 @@ class ValidatingAdvancedHTMLParser(AdvancedHTMLParser):
1619
exceptions.MissedCloseException - The parsed string/file missed closing an item.
1720
'''
1821

22+
def handle_starttag(self, tagName, attributeList, isSelfClosing=False):
23+
'''
24+
handle_starttag - internal for parsing,
25+
26+
ValidatingAdvancedHTMLParser will run through the attributes list and make sure
27+
none have an invalid name, or will raise an error.
28+
29+
30+
@raises - InvalidAttributeNameException if an attribute name is passed with invalid character(s)
31+
'''
32+
33+
# Iterate over the passed attributes, and validate them.
34+
for (attrName, attrValue) in attributeList:
35+
36+
if isValidAttributeName(attrName) is False:
37+
38+
raise InvalidAttributeNameException(tagName, attrName, attrValue)
39+
40+
# Done validating, feed to parent.
41+
return AdvancedHTMLParser.handle_starttag(self, tagName, attributeList, isSelfClosing)
42+
1943

2044
def handle_endtag(self, tagName):
2145
'''

AdvancedHTMLParser/exceptions.py

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ class HTMLValidationException(Exception):
3232

3333
class InvalidCloseException(HTMLValidationException):
3434
'''
35-
InvalidCloseException - Raised when a tag is closed that shouldn't be closed.
35+
InvalidCloseException - Raised when a tag is closed that shouldn't be closed in validating parser
3636
'''
3737

3838
def __init__(self, triedToClose, stillOpen):
@@ -48,7 +48,7 @@ def __init__(self, triedToClose, stillOpen):
4848

4949
class MissedCloseException(HTMLValidationException):
5050
'''
51-
MissedCloseException - Raised when a close was missed
51+
MissedCloseException - Raised when a close was missed in validating parser
5252
'''
5353

5454
def __init__(self, triedToClose, stillOpen):
@@ -60,6 +60,29 @@ def __init__(self, triedToClose, stillOpen):
6060
Exception.__init__(self, message)
6161

6262

63+
class InvalidAttributeNameException(HTMLValidationException):
64+
'''
65+
InvalidAttributeNameException - Raised when an invalid attribute name is found when parsing via validating parser
66+
'''
67+
68+
def __init__(self, tagName, badAttributeName, badAttributeValue):
69+
'''
70+
__init__ - Create this object
71+
72+
@param tagName <str> - Tag name
73+
74+
@param badAttributeName <str> - Bad attribute name
75+
76+
@param badAttributeValue <str> - Bad attribute value
77+
'''
78+
79+
message = 'Parsed a tag %s which contains an invalid attribute, %s = %s . ( Maybe characters outside quotes in tag? )' % ( \
80+
tagName, repr(badAttributeName), repr(badAttributeValue) \
81+
)
82+
83+
Exception.__init__(self, message)
84+
85+
6386
class IndexSizeErrorException(ValueError):
6487

6588
def __init__(self, *args, **kwargs):

0 commit comments

Comments
 (0)