@@ -124,7 +124,7 @@ cdef class HTMLParser:
124124 status = myhtml_parse(self .html_tree, self ._encoding, html, html_len)
125125
126126 if status != 0 :
127- raise RuntimeError (" Can't parse HTML: \n %s " % str (html) )
127+ raise RuntimeError (" Can't parse HTML (status code: %d ) " % status )
128128
129129 assert self .html_tree.node_html != NULL
130130
@@ -147,9 +147,13 @@ cdef class HTMLParser:
147147 def root (self ):
148148 """ Returns root node."""
149149 if self .html_tree and self .html_tree.node_html:
150- node = Node()
151- node._init(self .html_tree.node_html, self )
152- return node
150+ try :
151+ node = Node()
152+ node._init(self .html_tree.node_html, self )
153+ return node
154+ except Exception :
155+ # If Node creation or initialization fails, return None
156+ return None
153157 return None
154158
155159 @property
@@ -185,6 +189,12 @@ cdef class HTMLParser:
185189 name : str (e.g. div)
186190
187191 """
192+ # Validate tag name
193+ if not name:
194+ raise ValueError (" Tag name cannot be empty" )
195+ if len (name) > 100 : # Reasonable limit for tag names
196+ raise ValueError (" Tag name is too long" )
197+
188198 cdef myhtml_collection_t* collection = NULL
189199 pybyte_name = name.encode(' UTF-8' )
190200 cdef mystatus_t status = 0 ;
@@ -428,6 +438,7 @@ cdef class HTMLParser:
428438 if self .html_tree != NULL :
429439 myhtml = self .html_tree.myhtml
430440 myhtml_tree_destroy(self .html_tree)
441+ self .html_tree = NULL # Prevent double-free
431442 if myhtml != NULL :
432443 myhtml_destroy(myhtml)
433444
0 commit comments