Skip to content

Commit 37da3a4

Browse files
committed
xpath - Implement "contains" function
1 parent 2e12984 commit 37da3a4

File tree

2 files changed

+104
-2
lines changed

2 files changed

+104
-2
lines changed

AdvancedHTMLParser/xpath/_body.py

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1201,6 +1201,71 @@ def resolveValueFromTag(self, thisTag):
12011201
VALUE_GENERATOR_RES.append( (BEVG_FUNCTION_CONCAT_RE, BodyElementValueGenerator_Function_Concat) )
12021202

12031203

1204+
class BodyElementValueGenerator_Function_Contains(BodyElementValueGenerator_Function):
1205+
'''
1206+
BodyElementValueGenerator_Function_Contains - BodyElementValueGenerator class implementing contains function
1207+
'''
1208+
1209+
# FUNCTION_MIN_ARGS - Class attribute for the minimum number of args lest there be a parsing error
1210+
FUNCTION_MIN_ARGS = 2
1211+
1212+
# FUNCTION_NAME_STR - Name of the function
1213+
FUNCTION_NAME_STR = 'contains'
1214+
1215+
1216+
def __init__(self, fnArgElements=None):
1217+
'''
1218+
__init__ - Create this object
1219+
'''
1220+
BodyElementValueGenerator_Function.__init__(self, fnArgElements)
1221+
1222+
# Ensure we are given exactly two arguments
1223+
fnArgElements = self.fnArgElements
1224+
if len(fnArgElements) != 2:
1225+
raise XPathParseError('"contains" function takes exactly two arguments, but got %d. Args were: %s' % ( \
1226+
len(fnArgElements),
1227+
repr(fnArgElements),
1228+
)
1229+
)
1230+
1231+
self.string1Arg = fnArgElements[0]
1232+
self.string2Arg = fnArgElements[1]
1233+
1234+
1235+
def resolveValueFromTag(self, thisTag):
1236+
'''
1237+
resolveValueFromTag - Test if one string occurs within the other, and return the boolean result
1238+
1239+
1240+
@param thisTag <AdvancedTag> - The tag of interest
1241+
1242+
1243+
@return <BodyElementValue_Boolean> - True if string1 contains string2, otherwise False
1244+
1245+
1246+
@see BodyElementValueGenerator_Function.resolveValueFromTag
1247+
'''
1248+
1249+
string1ValueElement = self.string1Arg.evaluateLevelForTag(thisTag)
1250+
string2ValueElement = self.string2Arg.evaluateLevelForTag(thisTag)
1251+
1252+
try:
1253+
string1Value = str( string1ValueElement.getValue() )
1254+
except Exception as e1:
1255+
raise XPathRuntimeError('Error in contains() - cannot convert first argument to a string! It is %s' %( repr(string1ValueElement.getValue()), ))
1256+
try:
1257+
string2Value = str( string2ValueElement.getValue() )
1258+
except Exception as e2:
1259+
raise XPathRuntimeError('Error in contains() - cannot convert second argument to a string! It is %s' %( repr(string2ValueElement.getValue()), ))
1260+
1261+
containsResult = bool( string2Value in string1Value )
1262+
1263+
return BodyElementValue_Boolean(containsResult)
1264+
1265+
1266+
BEVG_FUNCTION_CONTAINS_RE = re.compile(r'''^([ \t]*[cC][oO][nN][tT][aA][iI][nN][sS][ \t]*[\(][ \t]*(?P<restOfBody>.+))$''')
1267+
VALUE_GENERATOR_RES.append( (BEVG_FUNCTION_CONTAINS_RE, BodyElementValueGenerator_Function_Contains) )
1268+
12041269
#############################
12051270
## Operations ##
12061271
#############################

tests/AdvancedHTMLParserTests/test_XPath.py

Lines changed: 39 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -393,9 +393,9 @@ def test_xpathLast(self):
393393
assert lastSpan in results , 'Got a mismatch of results from xpath vs non-xpath. Node ( %s ) was found via non-xpath, but not in the xpath set!' %(repr(lastSpan), )
394394

395395

396-
def test_parseOptimizations1(self):
396+
def test_xpathParseOptimizations1(self):
397397
'''
398-
test_parseOptimizations1 - Test that we properly optimize xpath strings with values that can be calculated at parse time
398+
test_xpathParseOptimizations1 - Test that we properly optimize xpath strings with values that can be calculated at parse time
399399
'''
400400

401401
bodyElements = parseBodyStringIntoBodyElements('''"hello" || " " || "world" = "hello world"''')
@@ -410,6 +410,43 @@ def test_parseOptimizations1(self):
410410
assert value is True , 'Expected the calculated BodyElementValue to be <bool> True. Got: <%s> %s' %( type(value).__name__, repr(value))
411411

412412

413+
414+
def test_xpathContains(self):
415+
'''
416+
test_xpathContains - Test the "contains" function
417+
'''
418+
419+
puddingNameSpans = self.parser.getElementsByXPathExpression('//span[ @name = "itemName" and contains( text(), "Pudding" ) ]')
420+
421+
assert len(puddingNameSpans) == 1 , 'Expected to get one span[name="itemName"] where inner text contains "Pudding", but got %d. %s' %( len(puddingNameSpans), repr(puddingNameSpans) )
422+
423+
puddingSpan = puddingNameSpans[0]
424+
assert 'Pudding Cups' in puddingSpan.innerText , 'Expected "Pudding Cups" to be in the inner text of the matched pudding item, but it was not. Inner text was: %s' %( repr(puddingSpan.innerText), )
425+
426+
427+
itemsContainingLetterE = self.parser.getElementsByXPathExpression('''//span[(@name = "itemName") and contains( normalize-space(), "e" )]/ancestor::div[@name="items"]''')
428+
assert len(itemsContainingLetterE) == 3 , 'Expected to find 3 items which contained lower case "e" , but found %d ! %s' %( len(itemsContainingLetterE), repr(itemsContainingLetterE) )
429+
430+
431+
item1Found = False
432+
item2Found = False
433+
item3Found = False
434+
435+
for itemEm in itemsContainingLetterE:
436+
437+
if itemEm.id == 'item1':
438+
item1Found = True
439+
elif itemEm.id == 'item2':
440+
item2Found = True
441+
elif itemEm.id == 'item3':
442+
item3Found = True
443+
444+
assert item1Found is True , 'Expected to find div id="item1" but did not!'
445+
assert item2Found is True , 'Expected to find div id="item2" but did not!'
446+
assert item3Found is True , 'Expected to find div id="item3" but did not!'
447+
448+
449+
413450
if __name__ == '__main__':
414451
sys.exit(subprocess.Popen('GoodTests.py -n1 "%s" %s' %(sys.argv[0], ' '.join(['"%s"' %(arg.replace('"', '\\"'), ) for arg in sys.argv[1:]]) ), shell=True).wait())
415452

0 commit comments

Comments
 (0)