prints and notebook cleaned

vm-aifluence-jro · vm-aifluence-jro · commit 1bb8b7f3f340 · 2023-06-13T16:48:17.000Z
diff --git a/examples/benchmark.md b/examples/benchmark.md
@@ -186,12 +186,6 @@ Concretely, the comparator takes as input a dataframe to impute, a proportion of
 
 Note these metrics compute reconstruction errors; it tells nothing about the distances between the "true" and "imputed" distributions.
 
-```python
-df = pd.DataFrame(columns=["a", "b"])
-df["a"] = [1, 2]
-df
-```
-
 ```python
 generator_holes = missing_patterns.EmpiricalHoleGenerator(n_splits=2, groups=["station"], ratio_masked=ratio_masked)
 
diff --git a/qolmat/utils/data.py b/qolmat/utils/data.py
@@ -16,10 +16,7 @@ def download_data(zipname: str, urllink: str, datapath: str = "data/") -> List[p
     path_zip_ext = path_zip + ".zip"
     url = os.path.join(urllink, zipname) + ".zip"
     os.makedirs(datapath, exist_ok=True)
-    print("exists ?", path_zip_ext)
     if not os.path.exists(path_zip_ext) and not os.path.exists(path_zip):
-        print("url")
-        print(url)
         request.urlretrieve(url, path_zip_ext)
     if not os.path.exists(path_zip):
         with zipfile.ZipFile(path_zip_ext, "r") as zip_ref:
@@ -132,8 +129,9 @@ def preprocess_data_beijing(df: pd.DataFrame) -> pd.DataFrame:
     df["datetime"] = pd.to_datetime(df[["year", "month", "day", "hour"]])
     df["station"] = "Beijing"
     df.set_index(["station", "datetime"], inplace=True)
-    # df.drop(columns=["year", "month", "day", "hour", "No", "cbwd", ""], inplace=True)
-    df.drop(columns=["year", "month", "day", "hour", "wd", "No"], inplace=True)
+    df.drop(
+        columns=["year", "month", "day", "hour", "No", "cbwd", "Iws", "Is", "Ir"], inplace=True
+    )
     df.sort_index(inplace=True)
     df = df.groupby(
         ["station", df.index.get_level_values("datetime").floor("d")], group_keys=False
@@ -154,8 +152,6 @@ def preprocess_data_beijing_offline(df: pd.DataFrame) -> pd.DataFrame:
     pd.DataFrame
         preprocessed dataframe
     """
-    print("preprocess_data_beijing_offline")
-    print(df.dtypes)
     df["datetime"] = pd.to_datetime(df[["year", "month", "day", "hour"]])
     df.set_index(["station", "datetime"], inplace=True)
     df.drop(columns=["year", "month", "day", "hour", "wd", "No"], inplace=True)