|
25 | 25 | HTTP_511_NETWORK_AUTHENTICATION_REQUIRED = 511, /**< Network Authentication Required */
|
26 | 26 | """
|
27 | 27 |
|
28 |
| -from lxml import html |
| 28 | +from html.parser import HTMLParser |
29 | 29 | import requests
|
30 | 30 | import re
|
31 | 31 |
|
32 |
| -page = requests.get('https://developer.mozilla.org/en-US/docs/Web/HTTP/Status') |
33 |
| -tree = html.fromstring(page.content) |
| 32 | +class HTTPStatusParser(HTMLParser): |
| 33 | + def __init__(self): |
| 34 | + super().__init__() |
| 35 | + self.status_codes = {} |
| 36 | + self.in_code_tag = False |
| 37 | + self.current_data = "" |
| 38 | + |
| 39 | + def handle_starttag(self, tag, attrs): |
| 40 | + if tag == 'code': |
| 41 | + self.in_code_tag = True |
| 42 | + self.current_data = "" |
| 43 | + |
| 44 | + def handle_endtag(self, tag): |
| 45 | + if tag == 'code' and self.in_code_tag: |
| 46 | + self.in_code_tag = False |
| 47 | + if self.current_data.strip(): |
| 48 | + match = re.match(r'([0-9]{3}) ([a-zA-Z].*)', self.current_data.strip()) |
| 49 | + if match: |
| 50 | + code = int(match.group(1)) |
| 51 | + description = match.group(2) |
| 52 | + self.status_codes[code] = description |
34 | 53 |
|
35 |
| -codes = tree.xpath('//code/text()') |
| 54 | + def handle_data(self, data): |
| 55 | + if self.in_code_tag: |
| 56 | + self.current_data += data |
| 57 | + |
| 58 | +page = requests.get('https://developer.mozilla.org/en-US/docs/Web/HTTP/Status') |
36 | 59 |
|
37 |
| -codes2 = {} |
38 |
| -for c in codes: |
39 |
| - if re.match('[0-9][0-9][0-9] [a-zA-Z].*', c): |
40 |
| - key = int(c[0:3]) |
41 |
| - val = c[4:] |
42 |
| - codes2[key] = val |
| 60 | +parser = HTTPStatusParser() |
| 61 | +parser.feed(page.text) |
43 | 62 |
|
44 |
| -keys = sorted(codes2.keys()) |
45 |
| -for key in keys: |
46 |
| - val = codes2[key] |
| 63 | +for key in sorted(parser.status_codes.keys()): |
| 64 | + val = parser.status_codes[key] |
47 | 65 | enum_head = 'HTTP'
|
48 | 66 | enum_body = f'{key}'
|
49 | 67 | enum_tail = val.upper().replace(' ', '_').replace("'", '').replace('-', '_')
|
|
0 commit comments