From 7dfbd320645be670a4aae095279e966f8ec8d340 Mon Sep 17 00:00:00 2001 From: Angelo Dell'Aera Date: Sat, 15 Feb 2025 17:41:56 +0100 Subject: [PATCH 1/4] Update beautifulsoup4 to v4.13.3 --- pyproject.toml | 2 +- thug/DOM/W3C/Core/Document.py | 2 +- thug/DOM/W3C/Core/Node.py | 2 +- thug/DOM/W3C/HTML/HTMLDocument.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index c4e952a1b0..9efb2e325a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,7 +38,7 @@ classifiers = [ ] dependencies = [ "appdirs==1.4.4", - "beautifulsoup4==4.12.3", + "beautifulsoup4==4.13.3", "charset-normalizer==3.4.1", "cssutils==2.11.1", "dhash==1.4", diff --git a/thug/DOM/W3C/Core/Document.py b/thug/DOM/W3C/Core/Document.py index 19952e808f..d048b863a5 100644 --- a/thug/DOM/W3C/Core/Document.py +++ b/thug/DOM/W3C/Core/Document.py @@ -224,7 +224,7 @@ def getElementsByTagName(self, tagname): from .NodeList import NodeList if tagname in ("*",): - return NodeList(self.doc, self.doc.find_all(string=False)) + return NodeList(self.doc, self.doc.find_all()) return NodeList(self.doc, self.doc.find_all(tagname.lower())) diff --git a/thug/DOM/W3C/Core/Node.py b/thug/DOM/W3C/Core/Node.py index 9af98a90a1..e61f57f932 100644 --- a/thug/DOM/W3C/Core/Node.py +++ b/thug/DOM/W3C/Core/Node.py @@ -429,7 +429,7 @@ def normalize(self): index += 1 continue - child.tag.string = child.innerText + sibling.innerText + child.tag.string.replace_with(child.innerText + sibling.innerText) self.removeChild(sibling) # Introduced in DOM Level 2 diff --git a/thug/DOM/W3C/HTML/HTMLDocument.py b/thug/DOM/W3C/HTML/HTMLDocument.py index 7a0cce5198..c1777e610e 100644 --- a/thug/DOM/W3C/HTML/HTMLDocument.py +++ b/thug/DOM/W3C/HTML/HTMLDocument.py @@ -481,7 +481,7 @@ def getElementsByName(self, elementName): def _all(self): from .HTMLAllCollection import HTMLAllCollection - s = list(self.doc.find_all(string=False)) + s = list(self.doc.find_all()) return HTMLAllCollection(self.doc, s) @property From 6cd64f274a42661e464914be85c293c498b8cca2 Mon Sep 17 00:00:00 2001 From: Angelo Dell'Aera Date: Sat, 15 Feb 2025 18:23:17 +0100 Subject: [PATCH 2/4] Minor fixes --- thug/DOM/W3C/Core/Node.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/thug/DOM/W3C/Core/Node.py b/thug/DOM/W3C/Core/Node.py index e61f57f932..6a1134bce2 100644 --- a/thug/DOM/W3C/Core/Node.py +++ b/thug/DOM/W3C/Core/Node.py @@ -114,10 +114,10 @@ def setNodeValue(self, value): # pragma: no cover nodeValue = property(getNodeValue, setNodeValue) def getTextContent(self): - return self.tag.string + return str(self.tag.string) def setTextContent(self, value): - self.tag.string = str(value) + self.tag.string.replace_with(str(value)) # Introduced in DOM Level 3 textContent = property(getTextContent, setTextContent) @@ -459,7 +459,7 @@ def cloneNode(self, deep=False): # this method does not copy any text it contains unless it is a # deep clone, since the Text is contained in a child Text node. if cloned.nodeType in (Node.ELEMENT_NODE,) and deep is False: - cloned.tag.string = "" + cloned.tag.string.replace_with("") return cloned From 02ac3cad8ebd90af9df7b09b025c918202bfa045 Mon Sep 17 00:00:00 2001 From: Angelo Dell'Aera Date: Sat, 15 Feb 2025 18:37:17 +0100 Subject: [PATCH 3/4] Minor fixes --- thug/DOM/W3C/Core/Node.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/thug/DOM/W3C/Core/Node.py b/thug/DOM/W3C/Core/Node.py index 6a1134bce2..6b47795c92 100644 --- a/thug/DOM/W3C/Core/Node.py +++ b/thug/DOM/W3C/Core/Node.py @@ -117,7 +117,7 @@ def getTextContent(self): return str(self.tag.string) def setTextContent(self, value): - self.tag.string.replace_with(str(value)) + self.tag.string = str(value) # Introduced in DOM Level 3 textContent = property(getTextContent, setTextContent) @@ -459,7 +459,7 @@ def cloneNode(self, deep=False): # this method does not copy any text it contains unless it is a # deep clone, since the Text is contained in a child Text node. if cloned.nodeType in (Node.ELEMENT_NODE,) and deep is False: - cloned.tag.string.replace_with("") + cloned.tag.string = "" return cloned From 059aab01ecc5024bba7cef88974e9b8ad024715d Mon Sep 17 00:00:00 2001 From: Angelo Dell'Aera Date: Mon, 17 Feb 2025 13:18:21 +0100 Subject: [PATCH 4/4] Remove code from coverage --- thug/DOM/DFT.py | 2 +- thug/DOM/W3C/HTML/HTMLBodyElement.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/thug/DOM/DFT.py b/thug/DOM/DFT.py index eb2254d9d3..7fad0c58d4 100644 --- a/thug/DOM/DFT.py +++ b/thug/DOM/DFT.py @@ -1574,7 +1574,7 @@ def check_small_element(self, element, tagname): try: value = int(attrs[key].split("px")[0]) - except Exception: # pylint:disable=broad-except + except Exception: # pragma: no cover,pylint:disable=broad-except value = None if not value: diff --git a/thug/DOM/W3C/HTML/HTMLBodyElement.py b/thug/DOM/W3C/HTML/HTMLBodyElement.py index 3c597b7f1f..036a18c70e 100644 --- a/thug/DOM/W3C/HTML/HTMLBodyElement.py +++ b/thug/DOM/W3C/HTML/HTMLBodyElement.py @@ -31,7 +31,7 @@ def getInnerHTML(self): for tag in self.tag.contents: try: html.write(str(tag)) - except Exception as e: # pylint:disable=broad-except + except Exception as e: # pragma: no cover,pylint:disable=broad-except log.warning("[HTMLBodyElement] innerHTML warning: %s", str(e)) return html.getvalue()