@@ -16,10 +16,7 @@ def download_data(zipname: str, urllink: str, datapath: str = "data/") -> List[p
1616 path_zip_ext = path_zip + ".zip"
1717 url = os .path .join (urllink , zipname ) + ".zip"
1818 os .makedirs (datapath , exist_ok = True )
19- print ("exists ?" , path_zip_ext )
2019 if not os .path .exists (path_zip_ext ) and not os .path .exists (path_zip ):
21- print ("url" )
22- print (url )
2320 request .urlretrieve (url , path_zip_ext )
2421 if not os .path .exists (path_zip ):
2522 with zipfile .ZipFile (path_zip_ext , "r" ) as zip_ref :
@@ -132,8 +129,9 @@ def preprocess_data_beijing(df: pd.DataFrame) -> pd.DataFrame:
132129 df ["datetime" ] = pd .to_datetime (df [["year" , "month" , "day" , "hour" ]])
133130 df ["station" ] = "Beijing"
134131 df .set_index (["station" , "datetime" ], inplace = True )
135- # df.drop(columns=["year", "month", "day", "hour", "No", "cbwd", ""], inplace=True)
136- df .drop (columns = ["year" , "month" , "day" , "hour" , "wd" , "No" ], inplace = True )
132+ df .drop (
133+ columns = ["year" , "month" , "day" , "hour" , "No" , "cbwd" , "Iws" , "Is" , "Ir" ], inplace = True
134+ )
137135 df .sort_index (inplace = True )
138136 df = df .groupby (
139137 ["station" , df .index .get_level_values ("datetime" ).floor ("d" )], group_keys = False
@@ -154,8 +152,6 @@ def preprocess_data_beijing_offline(df: pd.DataFrame) -> pd.DataFrame:
154152 pd.DataFrame
155153 preprocessed dataframe
156154 """
157- print ("preprocess_data_beijing_offline" )
158- print (df .dtypes )
159155 df ["datetime" ] = pd .to_datetime (df [["year" , "month" , "day" , "hour" ]])
160156 df .set_index (["station" , "datetime" ], inplace = True )
161157 df .drop (columns = ["year" , "month" , "day" , "hour" , "wd" , "No" ], inplace = True )
0 commit comments