5555
5656< body >
5757 < header >
58- < aside > December 20 , 2024</ aside >
58+ < aside > December 24 , 2024</ aside >
5959 < a href ="https://gto76.github.io " rel ="author "> Jure Šorn</ a >
6060 </ header >
6161
@@ -2052,10 +2052,9 @@ <h3 id="format-2">Format</h3><div><h4 id="forstandardtypesizesandmanualalignment
20522052table = document.find(< span class ="hljs-string "> 'table'</ span > , class_=< span class ="hljs-string "> 'infobox vevent'</ span > )
20532053python_url = table.find(< span class ="hljs-string "> 'th'</ span > , text=< span class ="hljs-string "> 'Website'</ span > ).next_sibling.a[< span class ="hljs-string "> 'href'</ span > ]
20542054logo_url = table.find(< span class ="hljs-string "> 'img'</ span > )[< span class ="hljs-string "> 'src'</ span > ]
2055- logo = requests.get(< span class ="hljs-string "> f'https:< span class ="hljs-subst "> {logo_url}</ span > '</ span > ).content
20562055filename = os.path.basename(logo_url)
20572056< span class ="hljs-keyword "> with</ span > open(filename, < span class ="hljs-string "> 'wb'</ span > ) < span class ="hljs-keyword "> as</ span > file:
2058- file.write(logo )
2057+ file.write(requests.get( < span class =" hljs-string " > f'https: < span class =" hljs-subst " > {logo_url} </ span > ' </ span > ).content )
20592058print(< span class ="hljs-string "> f'< span class ="hljs-subst "> {python_url}</ span > , file://< span class ="hljs-subst "> {os.path.abspath(filename)}</ span > '</ span > )
20602059</ code > </ pre > </ div > </ div >
20612060
@@ -2065,6 +2064,7 @@ <h3 id="format-2">Format</h3><div><h4 id="forstandardtypesizesandmanualalignment
20652064
20662065<WebDrv> = webdriver.Chrome/Firefox/Safari/Edge() < span class ="hljs-comment "> # Opens a browser. Also <WebDrv>.quit().</ span >
20672066<WebDrv>.get(< span class ="hljs-string "> '<url>'</ span > ) < span class ="hljs-comment "> # Also <WebDrv>.implicitly_wait(seconds).</ span >
2067+ <str> = <WebDrv>.page_source < span class ="hljs-comment "> # Returns HTML of fully rendered page.</ span >
20682068<El> = <WebDrv/El>.find_element(< span class ="hljs-string "> 'css selector'</ span > , …) < span class ="hljs-comment "> # '<tag>#<id>.<class>[<attr>="<val>"]…'.</ span >
20692069<list> = <WebDrv/El>.find_elements(< span class ="hljs-string "> 'xpath'</ span > , …) < span class ="hljs-comment "> # '//<tag>[@<attr>="<val>"]…'. See XPath.</ span >
20702070<str> = <El>.get_attribute(<str>) < span class ="hljs-comment "> # Property if exists. Also <El>.text.</ span >
@@ -2805,32 +2805,41 @@ <h3 id="format-2">Format</h3><div><h4 id="forstandardtypesizesandmanualalignment
28052805
28062806
28072807
2808- < div > < h4 id ="displaysamultiaxislinechartoftotalcoronaviruscasesandchangesinpricesofbitcoindowjonesandgold "> Displays a multi-axis line chart of total coronavirus cases and changes in prices of Bitcoin, Dow Jones and gold:</ h4 > < p > </ p > < div id ="e23ccacc-a456-478b-b467-7282a2165921 " class ="plotly-graph-div " style ="height:287px; width:935px; "> </ div > < pre > < code class ="python language-python hljs "> < span class ="hljs-keyword "> import</ span > pandas < span class ="hljs-keyword "> as</ span > pd, plotly.graph_objects < span class ="hljs-keyword "> as</ span > go
2808+ < div > < h4 id ="displaysamultiaxislinechartoftotalcoronaviruscasesandchangesinpricesofbitcoindowjonesandgold "> Displays a multi-axis line chart of total coronavirus cases and changes in prices of Bitcoin, Dow Jones and gold:</ h4 > < p > </ p > < div id ="e23ccacc-a456-478b-b467-7282a2165921 " class ="plotly-graph-div " style ="height:287px; width:935px; "> </ div > < pre > < code class ="python language-python hljs "> < span class ="hljs-comment "> # $ pip3 install pandas selenium plotly lxml</ span >
2809+ < span class ="hljs-keyword "> import</ span > pandas < span class ="hljs-keyword "> as</ span > pd, selenium.webdriver, plotly.graph_objects < span class ="hljs-keyword "> as</ span > go
2810+
28092811
28102812< span class ="hljs-function "> < span class ="hljs-keyword "> def</ span > < span class ="hljs-title "> main</ span > < span class ="hljs-params "> ()</ span > :</ span >
2811- covid, bitcoin, gold, dow = scrape_data ()
2813+ covid, ( bitcoin, gold, dow) = get_covid_cases(), get_tickers ()
28122814 df = wrangle_data(covid, bitcoin, gold, dow)
28132815 display_data(df)
28142816
2815- < span class ="hljs-function "> < span class ="hljs-keyword "> def</ span > < span class ="hljs-title "> scrape_data</ span > < span class ="hljs-params "> ()</ span > :</ span >
2816- < span class ="hljs-function "> < span class ="hljs-keyword "> def</ span > < span class ="hljs-title "> get_covid_cases</ span > < span class ="hljs-params "> ()</ span > :</ span >
2817- url = < span class ="hljs-string "> 'https://covid.ourworldindata.org/data/owid-covid-data.csv'</ span >
2818- df = pd.read_csv(url, usecols=[< span class ="hljs-string "> 'location'</ span > , < span class ="hljs-string "> 'date'</ span > , < span class ="hljs-string "> 'total_cases'</ span > ])
2819- df = df[df.location == < span class ="hljs-string "> 'World'</ span > ]
2820- < span class ="hljs-keyword "> return</ span > df.set_index(< span class ="hljs-string "> 'date'</ span > ).total_cases
2821- < span class ="hljs-function "> < span class ="hljs-keyword "> def</ span > < span class ="hljs-title "> get_ticker</ span > < span class ="hljs-params "> (symbol)</ span > :</ span >
2822- url = (< span class ="hljs-string "> f'https://query1.finance.yahoo.com/v7/finance/download/< span class ="hljs-subst "> {symbol}</ span > ?'</ span >
2823- < span class ="hljs-string "> 'period1=1579651200&period2=9999999999&interval=1d&events=history'</ span > )
2824- df = pd.read_csv(url, usecols=[< span class ="hljs-string "> 'Date'</ span > , < span class ="hljs-string "> 'Close'</ span > ])
2825- < span class ="hljs-keyword "> return</ span > df.set_index(< span class ="hljs-string "> 'Date'</ span > ).Close
2826- out = get_covid_cases(), get_ticker(< span class ="hljs-string "> 'BTC-USD'</ span > ), get_ticker(< span class ="hljs-string "> 'GC=F'</ span > ), get_ticker(< span class ="hljs-string "> '^DJI'</ span > )
2827- names = [< span class ="hljs-string "> 'Total Cases'</ span > , < span class ="hljs-string "> 'Bitcoin'</ span > , < span class ="hljs-string "> 'Gold'</ span > , < span class ="hljs-string "> 'Dow Jones'</ span > ]
2828- < span class ="hljs-keyword "> return</ span > map(pd.Series.rename, out, names)
2817+ < span class ="hljs-function "> < span class ="hljs-keyword "> def</ span > < span class ="hljs-title "> get_covid_cases</ span > < span class ="hljs-params "> ()</ span > :</ span >
2818+ url = < span class ="hljs-string "> 'https://covid.ourworldindata.org/data/owid-covid-data.csv'</ span >
2819+ df = pd.read_csv(url, usecols=[< span class ="hljs-string "> 'location'</ span > , < span class ="hljs-string "> 'date'</ span > , < span class ="hljs-string "> 'total_cases'</ span > ], parse_dates=[< span class ="hljs-string "> 'date'</ span > ])
2820+ df = df[df.location == < span class ="hljs-string "> 'World'</ span > ]
2821+ s = df.set_index(< span class ="hljs-string "> 'date'</ span > ).total_cases
2822+ < span class ="hljs-keyword "> return</ span > s.rename(< span class ="hljs-string "> 'Total Cases'</ span > )
2823+
2824+ < span class ="hljs-function "> < span class ="hljs-keyword "> def</ span > < span class ="hljs-title "> get_tickers</ span > < span class ="hljs-params "> ()</ span > :</ span >
2825+ < span class ="hljs-keyword "> with</ span > selenium.webdriver.Chrome() < span class ="hljs-keyword "> as</ span > driver:
2826+ symbols = {< span class ="hljs-string "> 'Bitcoin'</ span > : < span class ="hljs-string "> 'BTC-USD'</ span > , < span class ="hljs-string "> 'Gold'</ span > : < span class ="hljs-string "> 'GC=F'</ span > , < span class ="hljs-string "> 'Dow Jones'</ span > : < span class ="hljs-string "> '%5EDJI'</ span > }
2827+ < span class ="hljs-keyword "> for</ span > name, symbol < span class ="hljs-keyword "> in</ span > symbols.items():
2828+ < span class ="hljs-keyword "> yield</ span > get_ticker(driver, name, symbol)
2829+
2830+ < span class ="hljs-function "> < span class ="hljs-keyword "> def</ span > < span class ="hljs-title "> get_ticker</ span > < span class ="hljs-params "> (driver, name, symbol)</ span > :</ span >
2831+ url = < span class ="hljs-string "> f'https://finance.yahoo.com/quote/< span class ="hljs-subst "> {symbol}</ span > /history/'</ span >
2832+ driver.get(url + < span class ="hljs-string "> '?period1=1579651200&period2=9999999999'</ span > )
2833+ < span class ="hljs-keyword "> if</ span > buttons := driver.find_elements(< span class ="hljs-string "> 'xpath'</ span > , < span class ="hljs-string "> '//button[@name="reject"]'</ span > ):
2834+ buttons[< span class ="hljs-number "> 0</ span > ].click()
2835+ dataframes = pd.read_html(driver.page_source, parse_dates=[< span class ="hljs-string "> 'Date'</ span > ])
2836+ s = dataframes[< span class ="hljs-number "> 0</ span > ].set_index(< span class ="hljs-string "> 'Date'</ span > ).Open
2837+ < span class ="hljs-keyword "> return</ span > s.rename(name)
28292838
28302839< span class ="hljs-function "> < span class ="hljs-keyword "> def</ span > < span class ="hljs-title "> wrangle_data</ span > < span class ="hljs-params "> (covid, bitcoin, gold, dow)</ span > :</ span >
28312840 df = pd.concat([bitcoin, gold, dow], axis=< span class ="hljs-number "> 1</ span > ) < span class ="hljs-comment "> # Creates table by joining columns on dates.</ span >
28322841 df = df.sort_index().interpolate() < span class ="hljs-comment "> # Sorts rows by date and interpolates NaN-s.</ span >
2833- df = df.loc[< span class ="hljs-string "> '2020-02-23'</ span > :] < span class ="hljs-comment "> # Discards rows before '2020-02-23' .</ span >
2842+ df = df.loc[< span class ="hljs-string "> '2020-02-23'</ span > :< span class =" hljs-string " > '2021-12-20' </ span > ] < span class ="hljs-comment "> # Keeps rows between specified dates .</ span >
28342843 df = (df / df.iloc[< span class ="hljs-number "> 0</ span > ]) * < span class ="hljs-number "> 100</ span > < span class ="hljs-comment "> # Calculates percentages relative to day 1.</ span >
28352844 df = df.join(covid) < span class ="hljs-comment "> # Adds column with covid cases.</ span >
28362845 < span class ="hljs-keyword "> return</ span > df.sort_values(df.index[< span class ="hljs-number "> -1</ span > ], axis=< span class ="hljs-number "> 1</ span > ) < span class ="hljs-comment "> # Sorts columns by last day's value.</ span >
@@ -2842,11 +2851,12 @@ <h3 id="format-2">Format</h3><div><h4 id="forstandardtypesizesandmanualalignment
28422851 trace = go.Scatter(x=df.index, y=df[col_name], name=col_name, yaxis=yaxis)
28432852 figure.add_trace(trace)
28442853 figure.update_layout(
2854+ width=< span class ="hljs-number "> 944</ span > ,
2855+ height=< span class ="hljs-number "> 423</ span > ,
28452856 yaxis1=dict(title=< span class ="hljs-string "> 'Total Cases'</ span > , rangemode=< span class ="hljs-string "> 'tozero'</ span > ),
28462857 yaxis2=dict(title=< span class ="hljs-string "> '%'</ span > , rangemode=< span class ="hljs-string "> 'tozero'</ span > , overlaying=< span class ="hljs-string "> 'y'</ span > , side=< span class ="hljs-string "> 'right'</ span > ),
2847- legend=dict(x=< span class ="hljs-number "> 1.08</ span > ),
2848- width=< span class ="hljs-number "> 944</ span > ,
2849- height=< span class ="hljs-number "> 423</ span >
2858+ colorway=[< span class ="hljs-string "> '#EF553B'</ span > , < span class ="hljs-string "> '#636EFA'</ span > , < span class ="hljs-string "> '#00CC96'</ span > , < span class ="hljs-string "> '#FFA152'</ span > ],
2859+ legend=dict(x=< span class ="hljs-number "> 1.08</ span > )
28502860 )
28512861 figure.show()
28522862
@@ -2924,7 +2934,7 @@ <h3 id="format-2">Format</h3><div><h4 id="forstandardtypesizesandmanualalignment
29242934
29252935
29262936 < footer >
2927- < aside > December 20 , 2024</ aside >
2937+ < aside > December 24 , 2024</ aside >
29282938 < a href ="https://gto76.github.io " rel ="author "> Jure Šorn</ a >
29292939 </ footer >
29302940
0 commit comments