diff --git a/notebooks/covid_eda_raw.py b/notebooks/covid_eda_raw.py index 6507b62..f67fc0f 100644 --- a/notebooks/covid_eda_raw.py +++ b/notebooks/covid_eda_raw.py @@ -4,18 +4,16 @@ # COMMAND ---------- -!wget -q https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/hospitalizations/covid-hospitalizations.csv -O /tmp/covid-hospitalizations.csv +# MAGIC %md #### Transform # COMMAND ---------- -# MAGIC %md #### Transform +df = spark.sql("select * from aml_development.sample.covid_hospitalizations").toPandas() +print(df.head()) # COMMAND ---------- -import pandas as pd - # read from /tmp, subset for USA, pivot and fill missing values -df = pd.read_csv("/tmp/covid-hospitalizations.csv") df = df[df.iso_code == 'USA']\ .pivot_table(values='value', columns='indicator', index='date')\ .fillna(0)