Skip to content

Commit 710ec3e

Browse files
committed
Add getFirstElementCustomFilter to AdvancedHTMLParser and AdvancedTag. They find the FIRST item matching given criteria.
1 parent e02cf05 commit 710ec3e

File tree

3 files changed

+73
-0
lines changed

3 files changed

+73
-0
lines changed

AdvancedHTMLParser/Parser.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -392,6 +392,33 @@ def getElementsCustomFilter(self, filterFunc, root='root'):
392392
elements += self.getElementsCustomFilter(filterFunc, child)
393393
return TagCollection(elements)
394394

395+
396+
def getFirstElementCustomFilter(self, filterFunc, root='root'):
397+
'''
398+
getFirstElementCustomFilter - Scan elements using a provided function, stop and return the first match.
399+
400+
@see getElementsCustomFilter to match multiple elements
401+
402+
@param filterFunc <function>(node) - A function that takes an AdvancedTag as an argument, and returns True if some arbitrary criteria is met
403+
404+
@return - An AdvancedTag of the node that matched, or None if no match.
405+
'''
406+
(root, isFromRoot) = self._handleRootArg(root)
407+
408+
elements = []
409+
410+
if isFromRoot is True and filterFunc(root) is True:
411+
return root
412+
413+
for child in root.children:
414+
if filterFunc(child) is True:
415+
return child
416+
subRet = self.getFirstElementCustomFilter(filterFunc, child)
417+
if subRet:
418+
return subRet
419+
420+
return None
421+
395422
def contains(self, em):
396423
'''
397424
Checks if #em is found anywhere within this element tree

AdvancedHTMLParser/Tags.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -939,6 +939,8 @@ def getElementsCustomFilter(self, filterFunc):
939939
@param filterFunc <function> - A function or lambda expression that should return "True" if the passed node matches criteria.
940940
941941
@return - TagCollection of matching results
942+
943+
@see getFirstElementCustomFilter
942944
'''
943945
elements = []
944946

@@ -949,6 +951,29 @@ def getElementsCustomFilter(self, filterFunc):
949951

950952
return TagCollection(elements)
951953

954+
def getFirstElementCustomFilter(self, filterFunc):
955+
'''
956+
getFirstElementCustomFilter - Gets the first element which matches a given filter func.
957+
958+
Scans first child, to the bottom, then next child to the bottom, etc. Does not include "self" node.
959+
960+
@param filterFunc <function> - A function or lambda expression that should return "True" if the passed node matches criteria.
961+
962+
@return <AdvancedTag/None> - First match, or None
963+
964+
@see getElementsCustomFilter
965+
'''
966+
967+
for child in self.children:
968+
if filterFunc(child) is True:
969+
return child
970+
971+
childSearchResult = child.getFirstElementCustomFilter(filterFunc)
972+
if childSearchResult is not None:
973+
return childSearchResult
974+
975+
return None
976+
952977
def getPeersByAttr(self, attrName, attrValue):
953978
'''
954979
getPeersByAttr - Gets peers (elements on same level) which match an attribute/value combination.

tests/AdvancedHTMLParserTests/test_CustomFilter.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,27 @@ def searchFunc(node):
126126
assert len(results) == 1
127127
assert results[0].id == 'item4'
128128

129+
def test_oneElementCustomFilter(self):
130+
131+
parser = self.parser
132+
133+
def searchFunc(node):
134+
return bool(node.hasClass('three'))
135+
136+
137+
firstThreeNode = parser.getFirstElementCustomFilter(searchFunc)
138+
139+
assert firstThreeNode , 'Expected to get a node with class="three" on AdvancedHTMLParser.getFirstElementCustomFilter'
140+
assert firstThreeNode.id == 'item4' , 'Expected to get id="item4"'
141+
142+
bodyNode = parser.getElementsByTagName('body')[0]
143+
assert bodyNode , 'Failed to find body node'
144+
145+
firstThreeNode = bodyNode.getFirstElementCustomFilter(searchFunc)
146+
147+
assert firstThreeNode , 'Expected to get a node with class="three" on AdvancedTag.getFirstElementCustomFilter'
148+
assert firstThreeNode.id == 'item4' , 'Expected to get id="item4"'
149+
129150

130151

131152
if __name__ == '__main__':

0 commit comments

Comments
 (0)