Add AdvancedHTMLParser.AdvancedHTMLParser.setDoctype function, to allow you to set the doctype (versus having to use parser.doctype = "DOCTYPE html" ). When you parse a page that contains a doctype, it will still be detected and used in the same way. You can also pass None to this function to clear the doctype, and not output a doctype tag in AdvancedHTMLParser.getHTML

kata198 · kata198 · commit 135b1d707026 · 2019-06-21T22:16:36.000-04:00
diff --git a/AdvancedHTMLParser/Parser.py b/AdvancedHTMLParser/Parser.py
@@ -287,6 +287,22 @@ def setRoot(self, root):
         '''
         self.root = root
 
+
+    def setDoctype(self, newDoctype):
+        '''
+            setDoctype - Set the doctype for this document, or clear it.
+
+                @param newDoctype <str/None> -
+
+                    If None, will clear the doctype and not return one with #getHTML
+
+                    Otherwise, a string of the full doctype tag.
+
+                      For example, the HTML5 doctype would be "DOCTYPE html"
+        '''
+        self.doctype = newDoctype
+
+
     def getElementsByTagName(self, tagName, root='root'):
         '''
             getElementsByTagName - Searches and returns all elements with a specific tag name.
@@ -1120,6 +1136,7 @@ def setRoot(self, root):
 #                 Public
 ##########################################################
 
+
     # This should be called if you modify a parsed tree at an element level, then search it.
     def reindex(self, newIndexIDs=None, newIndexNames=None, newIndexClassNames=None, newIndexTagNames=None):
         '''