Skip to content

Commit 82c955a

Browse files
committed
scripts: net: Remove dependency to lxml in enumerate_http_status.py
Using the third party lxml library for the very simple parsing task this script performs in unnecessary, so switch to Python's built-in HTMLParser. Signed-off-by: Benjamin Cabé <[email protected]>
1 parent 45eedaa commit 82c955a

File tree

1 file changed

+31
-13
lines changed

1 file changed

+31
-13
lines changed

scripts/net/enumerate_http_status.py

Lines changed: 31 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -25,25 +25,43 @@
2525
HTTP_511_NETWORK_AUTHENTICATION_REQUIRED = 511, /**< Network Authentication Required */
2626
"""
2727

28-
from lxml import html
28+
from html.parser import HTMLParser
2929
import requests
3030
import re
3131

32-
page = requests.get('https://developer.mozilla.org/en-US/docs/Web/HTTP/Status')
33-
tree = html.fromstring(page.content)
32+
class HTTPStatusParser(HTMLParser):
33+
def __init__(self):
34+
super().__init__()
35+
self.status_codes = {}
36+
self.in_code_tag = False
37+
self.current_data = ""
38+
39+
def handle_starttag(self, tag, attrs):
40+
if tag == 'code':
41+
self.in_code_tag = True
42+
self.current_data = ""
43+
44+
def handle_endtag(self, tag):
45+
if tag == 'code' and self.in_code_tag:
46+
self.in_code_tag = False
47+
if self.current_data.strip():
48+
match = re.match(r'([0-9]{3}) ([a-zA-Z].*)', self.current_data.strip())
49+
if match:
50+
code = int(match.group(1))
51+
description = match.group(2)
52+
self.status_codes[code] = description
3453

35-
codes = tree.xpath('//code/text()')
54+
def handle_data(self, data):
55+
if self.in_code_tag:
56+
self.current_data += data
57+
58+
page = requests.get('https://developer.mozilla.org/en-US/docs/Web/HTTP/Status')
3659

37-
codes2 = {}
38-
for c in codes:
39-
if re.match('[0-9][0-9][0-9] [a-zA-Z].*', c):
40-
key = int(c[0:3])
41-
val = c[4:]
42-
codes2[key] = val
60+
parser = HTTPStatusParser()
61+
parser.feed(page.text)
4362

44-
keys = sorted(codes2.keys())
45-
for key in keys:
46-
val = codes2[key]
63+
for key in sorted(parser.status_codes.keys()):
64+
val = parser.status_codes[key]
4765
enum_head = 'HTTP'
4866
enum_body = f'{key}'
4967
enum_tail = val.upper().replace(' ', '_').replace("'", '').replace('-', '_')

0 commit comments

Comments
 (0)