Minor improvements

rushter · rushter · commit 823859a0cb99 · 2025-05-28T21:02:18.000+04:00
diff --git a/selectolax/lexbor.pyx b/selectolax/lexbor.pyx
@@ -96,6 +96,12 @@ cdef class LexborHTMLParser:
         name : str (e.g. div)
 
         """
+
+        if not name:
+            raise ValueError("Tag name cannot be empty")
+        if len(name) > 100:  # Reasonable limit for tag names
+            raise ValueError("Tag name is too long")
+
         cdef lxb_dom_collection_t* collection = NULL
         cdef lxb_status_t status
         pybyte_name = name.encode('UTF-8')
diff --git a/selectolax/modest/node.pxi b/selectolax/modest/node.pxi
@@ -14,6 +14,8 @@ cdef class Stack:
         self.capacity = capacity
         self.top = 0
         self._stack = <myhtml_tree_node_t**> malloc(capacity * sizeof(myhtml_tree_node_t))
+        if self._stack == NULL:
+            raise MemoryError("Failed to allocate memory for stack")
 
     def __dealloc__(self):
         free(self._stack)
@@ -595,7 +597,7 @@ cdef class Node:
         >>> tree.body.unwrap_tags(['i','a'])
         >>> tree.body.html
         '<body><div>Hello world!</div></body>'
-        
+
         Note: by default, empty tags are ignored, set "delete_empty" to "True" to change this.
         """
 
@@ -783,7 +785,7 @@ cdef class Node:
         >>> tree.body.unwrap_tags(['i','a'])
         >>> tree.body.html
         '<body><div>Hello world!</div></body>'
-        
+
         Note: by default, empty tags are ignored, set "delete_empty" to "True" to change this.
         """
 
diff --git a/selectolax/parser.pyx b/selectolax/parser.pyx
@@ -124,7 +124,7 @@ cdef class HTMLParser:
             status = myhtml_parse(self.html_tree, self._encoding, html, html_len)
 
         if status != 0:
-            raise RuntimeError("Can't parse HTML:\n%s" % str(html))
+            raise RuntimeError("Can't parse HTML (status code: %d)" % status)
 
         assert self.html_tree.node_html != NULL
 
@@ -147,9 +147,13 @@ cdef class HTMLParser:
     def root(self):
         """Returns root node."""
         if self.html_tree and self.html_tree.node_html:
-            node = Node()
-            node._init(self.html_tree.node_html, self)
-            return node
+            try:
+                node = Node()
+                node._init(self.html_tree.node_html, self)
+                return node
+            except Exception:
+                # If Node creation or initialization fails, return None
+                return None
         return None
 
     @property
@@ -185,6 +189,12 @@ cdef class HTMLParser:
         name : str (e.g. div)
 
         """
+        # Validate tag name
+        if not name:
+            raise ValueError("Tag name cannot be empty")
+        if len(name) > 100:  # Reasonable limit for tag names
+            raise ValueError("Tag name is too long")
+
         cdef myhtml_collection_t* collection = NULL
         pybyte_name = name.encode('UTF-8')
         cdef mystatus_t status = 0;
@@ -428,6 +438,7 @@ cdef class HTMLParser:
         if self.html_tree != NULL:
             myhtml = self.html_tree.myhtml
             myhtml_tree_destroy(self.html_tree)
+            self.html_tree = NULL  # Prevent double-free
             if myhtml != NULL:
                 myhtml_destroy(myhtml)