11import codecs
2+ import os
23
34import black # type: ignore
45from bs4 import BeautifulSoup , Comment , Doctype , NavigableString , Tag # type: ignore
@@ -122,10 +123,12 @@ def parsehtml(html, formatting, compact):
122123html = hg.BaseElement(""" ,
123124 ]
124125
125- soup = BeautifulSoup (
126- html ,
127- "lxml" ,
128- )
126+ if os .name == "nt" :
127+ parser = "html.parser"
128+ else :
129+ parser = "lxml"
130+
131+ soup = BeautifulSoup (html , parser )
129132 for subtag in soup .contents :
130133 tags = convert (subtag , 1 , compact )
131134 if tags :
@@ -146,6 +149,7 @@ def main():
146149
147150 formatflag = "--no-formatting"
148151 compactflag = "--compact"
152+ encodingflag = "--encoding"
149153
150154 files = sys .argv [1 :]
151155 formatting = formatflag not in files
@@ -154,10 +158,15 @@ def main():
154158 files .remove (formatflag )
155159 if compactflag in files :
156160 files .remove (compactflag )
161+ if encodingflag in files :
162+ encoding = files [files .index (encodingflag ) + 1 ]
163+ files .remove (encodingflag )
164+ files .remove (encoding )
165+
157166 if not files :
158167 print (parsehtml (sys .stdin .read (), formatting , compact ), end = "" )
159168 for _file in files :
160- with open (_file ) as rf :
169+ with open (_file , encoding = encoding ) as rf :
161170 with open (_file + ".py" , "w" ) as wf :
162171 wf .write (parsehtml (rf .read (), formatting , compact ))
163172
0 commit comments