@@ -15,7 +15,7 @@ def create_output_dirs(output_dir):
1515 (output_dir / d ).mkdir (parents = True , exist_ok = True )
1616
1717
18- def make_cell_output_dir (output_dir , sub_dir , lat , lon , variable = None ):
18+ def make_cell_output_dir (output_dir , sub_dir , lat , lon , variable ):
1919
2020 """ params: output_dir: a pathlib object """
2121
@@ -29,7 +29,7 @@ def make_cell_output_dir(output_dir, sub_dir, lat, lon, variable=None):
2929 return lat_sub_dir
3030
3131
32- def get_valid_subset (df , subset , seed ):
32+ def get_subset (df , subset , seed ):
3333
3434 orig_len = len (df )
3535 if subset > 1 :
@@ -38,23 +38,10 @@ def get_valid_subset(df, subset, seed):
3838 df = df .loc [np .sort (subselect ), :].copy ()
3939
4040 df .replace ([np .inf , - np .inf ], np .nan , inplace = True )
41- df_valid = df .dropna (axis = 0 , how = "any" )
4241
43- print (len (df_valid ), "data points used from originally" , orig_len , "datapoints." )
42+ print (len (df ), "data points used from originally" , orig_len , "datapoints." )
4443
45- return df_valid
46-
47-
48- # def get_valid_index(df, subset, seed):
49-
50- # orig_len = len(df)
51- # if subset > 1:
52- # np.random.seed(seed)
53- # subselect = np.random.choice(orig_len, np.int(orig_len/subset), replace=False)
54- # df = df.loc[np.sort(subselect), :].copy()
55-
56- # df.replace([np.inf, -np.inf], np.nan, inplace=True)
57- # return df.dropna(axis=0, how="any").index
44+ return df
5845
5946
6047def create_dataframe (nct_array , units , data_to_detrend , gmt , variable ):
@@ -101,40 +88,26 @@ def create_dataframe(nct_array, units, data_to_detrend, gmt, variable):
10188 return tdf , datamin , scale
10289
10390
104- def create_ref_df (df , trace_for_qm , ref_period , scale_variability , is_precip = False ):
91+ def create_ref_df (df , trace_for_qm , ref_period , params ):
10592
10693 df_params = pd .DataFrame (index = df .index )
10794
108- # print(trace_for_qm["mu"])
109-
110- df_params .loc [:, "mu" ] = trace_for_qm ["mu" ].mean (axis = 0 )
111- df_params .loc [:, "sigma" ] = trace_for_qm ["sigma" ].mean (axis = 0 )
112- if is_precip :
113- df_params .loc [:, "pbern" ] = trace_for_qm ["pbern" ].mean (axis = 0 )
95+ for p in params :
96+ df_params .loc [:, p ] = trace_for_qm [p ].mean (axis = 0 )
11497
11598 df_params .index = df ["ds" ]
11699
117100 df_params_ref = df_params .loc [ref_period [0 ] : ref_period [1 ]]
118101 # mean over all years for each day
119102 df_params_ref = df_params_ref .groupby (df_params_ref .index .dayofyear ).mean ()
120103
121- # case of not scaling variability
122- df_params .loc [:, "sigma_ref" ] = df_params ["sigma" ]
123104 # write the average values for the reference period to each day of the
124105 # whole timeseries
125106 for day in df_params_ref .index :
126- df_params .loc [df_params .index .dayofyear == day , "mu_ref" ] = df_params_ref .loc [
127- day , "mu"
128- ]
129- if is_precip :
107+ for p in params :
130108 df_params .loc [
131- df_params .index .dayofyear == day , "pbern_ref"
132- ] = df_params_ref .loc [day , "pbern" ]
133- # case of scaling sigma
134- if scale_variability :
135- df_params .loc [
136- df_params .index .dayofyear == day , "sigma_ref"
137- ] = df_params_ref .loc [day , "sigma" ]
109+ df_params .index .dayofyear == day , p + "_ref"
110+ ] = df_params_ref .loc [day , p ]
138111
139112 return df_params
140113
@@ -161,64 +134,36 @@ def get_source_timeseries(data_dir, dataset, qualifier, variable, lat, lon):
161134 obs_data .close ()
162135 return df
163136
137+ def get_cell_filename (outdir_for_cell , lat , lon , settings ):
164138
165- def save_to_disk (df_with_cfact , settings , lat , lon , dformat = ".h5" ):
166-
167- outdir_for_cell = make_cell_output_dir (
168- settings .output_dir , "timeseries" , lat , lon , settings .variable
139+ return outdir_for_cell / (
140+ "ts_" + settings .dataset + "_lat" + str (lat ) + "_lon" + str (lon ) + settings .storage_format
169141 )
170142
171- fname = outdir_for_cell / (
172- "ts_" + settings .dataset + "_lat" + str (lat ) + "_lon" + str (lon ) + dformat
173- )
143+ def test_if_data_valid_exists (fname ):
174144
175- if dformat == ".csv" :
176- df_with_cfact . to_csv (fname )
177- elif dformat == ".h5" :
178- df_with_cfact . to_hdf (fname , "lat_" + str ( lat ) + "_lon_" + str ( lon ), mode = "w" )
145+ if ".h5" in str ( fname ) :
146+ pd . read_hdf (fname )
147+ elif ".csv" in str ( fname ) :
148+ pd . read_csv (fname )
179149 else :
180- raise NotImplementedError ( "choose storage format .h5 or csv." )
150+ raise ValueError
181151
182- print ( "Saved timeseries to " , fname )
152+ def save_to_disk ( df_with_cfact , fname , lat , lon , storage_format ):
183153
154+ # outdir_for_cell = make_cell_output_dir(
155+ # settings.output_dir, "timeseries", lat, lon, settings.variable
156+ # )
184157
185- def read_from_disk (data_path ):
158+ # fname = outdir_for_cell / (
159+ # "ts_" + settings.dataset + "_lat" + str(lat) + "_lon" + str(lon) + dformat
160+ # )
186161
187- if data_path . split ( "." )[ - 1 ] == "h5 " :
188- df = pd . read_hdf ( data_path )
189- elif data_path . split ( "." )[ - 1 ] == "csv " :
190- df = pd . read_csv ( data_path , index_col = 0 )
162+ if storage_format == ".csv " :
163+ df_with_cfact . to_csv ( fname )
164+ elif storage_format == ".h5 " :
165+ df_with_cfact . to_hdf ( fname , "lat_" + str ( lat ) + "_lon_" + str ( lon ), mode = "w" )
191166 else :
192167 raise NotImplementedError ("choose storage format .h5 or csv." )
193168
194- return df
195-
196-
197- def form_global_nc (ds , time , lat , lon , vnames , torigin ):
198-
199- ds .createDimension ("time" , None )
200- ds .createDimension ("lat" , lat .shape [0 ])
201- ds .createDimension ("lon" , lon .shape [0 ])
202-
203- times = ds .createVariable ("time" , "f8" , ("time" ,))
204- longitudes = ds .createVariable ("lon" , "f8" , ("lon" ,))
205- latitudes = ds .createVariable ("lat" , "f8" , ("lat" ,))
206- for var in vnames :
207- data = ds .createVariable (
208- var ,
209- "f4" ,
210- ("time" , "lat" , "lon" ),
211- chunksizes = (time .shape [0 ], 1 , 1 ),
212- fill_value = 1e20 ,
213- )
214- times .units = torigin
215- latitudes .units = "degree_north"
216- latitudes .long_name = "latitude"
217- latitudes .standard_name = "latitude"
218- longitudes .units = "degree_east"
219- longitudes .long_name = "longitude"
220- longitudes .standard_name = "longitude"
221- # FIXME: make flexible or implement loading from source data
222- latitudes [:] = lat
223- longitudes [:] = lon
224- times [:] = time
169+ print ("Saved timeseries to " , fname )
0 commit comments