Skip to content

Commit ecff37f

Browse files
committed
xpath - Allow normalize-space to take an argument
1 parent 1fcdea0 commit ecff37f

File tree

4 files changed

+78
-22
lines changed

4 files changed

+78
-22
lines changed

AdvancedHTMLParser/xpath/_axes.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@
4646
def _mkRegexStrAllAxesPossibilities():
4747
'''
4848
_mkRegexStrAllAxesPossibilities - Make a regular expression string to match entire entities in our supported list
49-
49+
5050
of axes, case insensitively.
5151
5252

AdvancedHTMLParser/xpath/_body.py

Lines changed: 75 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -875,25 +875,6 @@ def __repr__(self):
875875
VALUE_GENERATOR_RES.append( (BEVG_FETCH_ATTRIBUTE_RE, BodyElementValueGenerator_FetchAttribute) )
876876

877877

878-
class BodyElementValueGenerator_NormalizeSpace(BodyElementValueGenerator):
879-
'''
880-
BodyElementValueGenerator_NormalizeSpace - Implement the 'normalize-space()' function
881-
'''
882-
883-
def __init__(self, functionInner=None):
884-
885-
BodyElementValueGenerator.__init__(self)
886-
887-
888-
def resolveValueFromTag(self, thisTag):
889-
890-
return BodyElementValue_String( thisTag.innerText.strip() )
891-
892-
893-
BEVG_NORMALIZE_SPACE_RE = re.compile(r'^([ \t]*[nN][oO][rR][mM][aA][lL][iI][zZ][eE][\-][sS][pP][aA][cC][eE][ \t]*[\(][ \t]*[\)][ \t]*)')
894-
VALUE_GENERATOR_RES.append( (BEVG_NORMALIZE_SPACE_RE, BodyElementValueGenerator_NormalizeSpace) )
895-
896-
897878
class BodyElementValueGenerator_Text(BodyElementValueGenerator):
898879
'''
899880
BodyElementValueGenerator_Text - Implement the 'text()' function
@@ -1266,6 +1247,81 @@ def resolveValueFromTag(self, thisTag):
12661247
BEVG_FUNCTION_CONTAINS_RE = re.compile(r'''^([ \t]*[cC][oO][nN][tT][aA][iI][nN][sS][ \t]*[\(][ \t]*(?P<restOfBody>.+))$''')
12671248
VALUE_GENERATOR_RES.append( (BEVG_FUNCTION_CONTAINS_RE, BodyElementValueGenerator_Function_Contains) )
12681249

1250+
1251+
class BodyElementValueGenerator_Function_NormalizeSpace(BodyElementValueGenerator_Function):
1252+
'''
1253+
BodyElementValueGenerator_NormalizeSpace - Implement the 'normalize-space()' function
1254+
'''
1255+
1256+
# FUNCTION_MIN_ARGS - Class attribute for the minimum number of args lest there be a parsing error
1257+
FUNCTION_MIN_ARGS = 0
1258+
1259+
# FUNCTION_NAME_STR - Name of the function
1260+
FUNCTION_NAME_STR = 'normalize-space'
1261+
1262+
1263+
def __init__(self, fnArgElements=None):
1264+
'''
1265+
__init__ - Create this object
1266+
'''
1267+
BodyElementValueGenerator_Function.__init__(self, fnArgElements)
1268+
1269+
# Ensure we are given exactly two arguments
1270+
fnArgElements = self.fnArgElements
1271+
numArguments = len(fnArgElements)
1272+
1273+
if numArguments > 1:
1274+
raise XPathParseError('normalize-space function called with too many arguments (0 or 1 supported)')
1275+
1276+
if numArguments == 1:
1277+
self.getString = lambda _thisTag : self._getStringFromArgumentAndTag(0, _thisTag)
1278+
else:
1279+
self.getString = lambda _thisTag : _thisTag.innerText
1280+
1281+
1282+
1283+
def _getStringFromArgumentAndTag(self, argumentNum, thisTag):
1284+
'''
1285+
_getStringFromArgument - Get the string for the given argument and tag
1286+
1287+
@param argumentNum <int> - The argument index
1288+
1289+
@param thisTag <AdvancedTag> - The tag of reference
1290+
1291+
1292+
@return <str> - The string held by that value
1293+
'''
1294+
valueEm = self.fnArgElements[0].evaluateLevelForTag(thisTag)
1295+
1296+
if not issubclass(valueEm.__class__, (BodyElementValue_String, BodyElementValue_Null) ):
1297+
raise XPathRuntimeError('Got a value returned from within argument to normalize-text which was not string! It was: %s' %( valueEm.VALUE_TYPE, ))
1298+
1299+
value = str(valueEm.getValue())
1300+
return value
1301+
1302+
def resolveValueFromTag(self, thisTag):
1303+
'''
1304+
resolveValueFromTag - Test if one string occurs within the other, and return the boolean result
1305+
1306+
1307+
@param thisTag <AdvancedTag> - The tag of interest
1308+
1309+
1310+
@return <BodyElementValue_Boolean> - True if string1 contains string2, otherwise False
1311+
1312+
1313+
@see BodyElementValueGenerator_Function.resolveValueFromTag
1314+
'''
1315+
1316+
stringValue = self.getString(thisTag)
1317+
return BodyElementValue_String(stringValue.strip())
1318+
1319+
1320+
BEVG_FUNCTION_NORMALIZE_SPACE_RE = re.compile(r'''^([ \t]*[nN][oO][rR][mM][aA][lL][iI][zZ][eE][\-][sS][pP][aA][cC][eE][ \t]*[\(][ \t]*(?P<restOfBody>.+))$''')
1321+
VALUE_GENERATOR_RES.append( (BEVG_FUNCTION_NORMALIZE_SPACE_RE, BodyElementValueGenerator_Function_NormalizeSpace) )
1322+
1323+
1324+
12691325
#############################
12701326
## Operations ##
12711327
#############################

AdvancedHTMLParser/xpath/_debug.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ def setXPathDebug(newValue):
3737
def getXPathDebug():
3838
'''
3939
getXPathDebug - Get whether we should print debug messages.
40-
40+
4141
Each function call with DEBUG output should fetch a fresh copy of this.
4242
'''
4343
global _XPATH_DEBUG

tests/AdvancedHTMLParserTests/test_XPath.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -246,7 +246,7 @@ def test_xpathBooleanAnd(self):
246246
'''
247247
test_xpathBooleanAnd - Test the "and" boolean operator
248248
'''
249-
itemsThatAreTurtles = self.parser.getElementsByXPathExpression('''//*[ @name = "itemName" and normalize-space() = "Turtles"]/parent::div''')
249+
itemsThatAreTurtles = self.parser.getElementsByXPathExpression('''//*[ normalize-space(@name) = "itemName" and normalize-space() = "Turtles"]/parent::div''')
250250

251251
assert len(itemsThatAreTurtles) == 1 , 'Expected to find one turtle item, but got: %s' %(repr(itemsThatAreTurtles), )
252252

0 commit comments

Comments
 (0)