|
1 | 1 | """ |
2 | | -This is to show simple COVID19 info fetching from worldometers archive site using lxml |
3 | | -* The main motivation to use lxml in place of bs4 is that it is faster and therefore |
4 | | -more convenient to use in Python web projects (e.g. Django or Flask-based) |
| 2 | +This script demonstrates fetching simple COVID-19 statistics from the Worldometers archive site using lxml. |
| 3 | +lxml is chosen over BeautifulSoup for its speed and convenience in Python web projects (such as Django or Flask). |
5 | 4 | """ |
6 | 5 |
|
7 | 6 | # /// script |
|
12 | 11 | # ] |
13 | 12 | # /// |
14 | 13 |
|
15 | | -from typing import NamedTuple |
16 | 14 |
|
| 15 | +from typing import NamedTuple |
17 | 16 | import httpx |
18 | 17 | from lxml import html |
19 | 18 |
|
20 | | - |
21 | 19 | class CovidData(NamedTuple): |
22 | 20 | cases: str |
23 | 21 | deaths: str |
24 | 22 | recovered: str |
25 | 23 |
|
26 | | - |
27 | 24 | def covid_stats( |
28 | 25 | url: str = "https://web.archive.org/web/20250825095350/https://www.worldometers.info/coronavirus/", |
29 | 26 | ) -> CovidData: |
30 | 27 | xpath_str = '//div[@class = "maincounter-number"]/span/text()' |
31 | | - return CovidData( |
32 | | - *html.fromstring(httpx.get(url, timeout=10).content).xpath(xpath_str) |
33 | | - ) |
34 | | - |
35 | | - |
36 | | -fmt = """Total COVID-19 cases in the world: {} |
| 28 | + try: |
| 29 | + response = httpx.get(url, timeout=10) |
| 30 | + response.raise_for_status() |
| 31 | + except httpx.TimeoutException: |
| 32 | + print("Request timed out. Please check your network connection or try again later.") |
| 33 | + return CovidData("N/A", "N/A", "N/A") |
| 34 | + except httpx.HTTPStatusError as e: |
| 35 | + print(f"HTTP error occurred: {e}") |
| 36 | + return CovidData("N/A", "N/A", "N/A") |
| 37 | + except Exception as e: |
| 38 | + print(f"An unexpected error occurred: {e}") |
| 39 | + return CovidData("N/A", "N/A", "N/A") |
| 40 | + data = html.fromstring(response.content).xpath(xpath_str) |
| 41 | + if len(data) != 3: |
| 42 | + print("Unexpected data format. The page structure may have changed.") |
| 43 | + return CovidData("N/A", "N/A", "N/A") |
| 44 | + return CovidData(*data) |
| 45 | + |
| 46 | +if __name__ == "__main__": |
| 47 | + fmt = """Total COVID-19 cases in the world: {} |
37 | 48 | Total deaths due to COVID-19 in the world: {} |
38 | 49 | Total COVID-19 patients recovered in the world: {}""" |
39 | | -print(fmt.format(*covid_stats())) |
| 50 | + print(fmt.format(*covid_stats())) |
0 commit comments