55from unstructured .documents .elements import Element
66from unstructured .documents .html import HTMLDocument
77from unstructured .documents .xml import VALID_PARSERS
8- from unstructured .partition .common import add_element_metadata , document_to_element_list
8+ from unstructured .partition .common import (
9+ add_element_metadata ,
10+ document_to_element_list ,
11+ exactly_one ,
12+ )
913
1014
1115def partition_html (
@@ -40,16 +44,16 @@ def partition_html(
4044 parser
4145 The parser to use for parsing the HTML document. If None, default parser will be used.
4246 """
43- if not any ([ filename , file , text , url ]):
44- raise ValueError ( "One of filename, file, or text must be specified." )
47+ # Verify that only one of the arguments was provided
48+ exactly_one ( filename = filename , file = file , text = text , url = url )
4549
4650 if not encoding :
4751 encoding = "utf-8"
4852
49- if filename is not None and not file and not text and not url :
53+ if filename is not None :
5054 document = HTMLDocument .from_file (filename , parser = parser , encoding = encoding )
5155
52- elif file is not None and not filename and not text and not url :
56+ elif file is not None :
5357 file_content = file .read ()
5458 if isinstance (file_content , bytes ):
5559 file_text = file_content .decode (encoding )
@@ -58,11 +62,11 @@ def partition_html(
5862
5963 document = HTMLDocument .from_string (file_text , parser = parser )
6064
61- elif text is not None and not filename and not file and not url :
65+ elif text is not None :
6266 _text : str = str (text )
6367 document = HTMLDocument .from_string (_text , parser = parser )
6468
65- elif url is not None and not filename and not file and not text :
69+ elif url is not None :
6670 response = requests .get (url )
6771 if not response .ok :
6872 raise ValueError (f"URL return an error: { response .status_code } " )
@@ -73,9 +77,6 @@ def partition_html(
7377
7478 document = HTMLDocument .from_string (response .text , parser = parser )
7579
76- else :
77- raise ValueError ("Only one of filename, file, or text can be specified." )
78-
7980 layout_elements = document_to_element_list (document , include_page_breaks = include_page_breaks )
8081 if include_metadata :
8182 return add_element_metadata (
0 commit comments