@@ -102,7 +102,8 @@ def process_chunk_of_files(
102102 overwrite_output : bool ,
103103 nwm_version : str ,
104104 variable_mapper : Dict [str , Dict [str , str ]],
105- timeseries_type : TimeseriesTypeEnum
105+ timeseries_type : TimeseriesTypeEnum ,
106+ drop_overlapping_assimilation_values : bool
106107):
107108 """Assemble a table for a chunk of NWM files."""
108109 location_ids = np .array (location_ids ).astype (int )
@@ -156,6 +157,12 @@ def process_chunk_of_files(
156157 end = f"{ end_json [1 ]} T{ end_json [3 ][1 :3 ]} F{ end_json [6 ][1 :]} "
157158 filename = f"{ start } _{ end } .parquet"
158159
160+ if drop_overlapping_assimilation_values and "assim" in configuration :
161+ # Set reference_time to NaT for assimilation values
162+ df_output = output_table .to_pandas ()
163+ df_output .loc [:, REFERENCE_TIME ] = pd .NaT
164+ output_table = pa .Table .from_pandas (df_output , schema = schema )
165+
159166 write_timeseries_parquet_file (
160167 Path (output_parquet_dir , filename ),
161168 overwrite_output ,
@@ -176,7 +183,8 @@ def fetch_and_format_nwm_points(
176183 overwrite_output : bool ,
177184 nwm_version : str ,
178185 variable_mapper : Dict [str , Dict [str , str ]],
179- timeseries_type : TimeseriesTypeEnum
186+ timeseries_type : TimeseriesTypeEnum ,
187+ drop_overlapping_assimilation_values : bool
180188):
181189 """Fetch NWM point data and save as parquet files.
182190
@@ -211,6 +219,12 @@ def fetch_and_format_nwm_points(
211219 they already exist. True = overwrite; False = fail.
212220 nwm_version : str
213221 Specified NWM version.
222+ variable_mapper : Dict[str, Dict[str, str]]
223+ A mapping dictionary for variable names and units.
224+ timeseries_type : TimeseriesTypeEnum
225+ The type of timeseries being processed.
226+ drop_overlapping_assimilation_values : bool
227+ Whether to drop assimilation values that overlap in value_time.
214228 """
215229 output_parquet_dir = Path (output_parquet_dir )
216230 if not output_parquet_dir .exists ():
@@ -241,5 +255,6 @@ def fetch_and_format_nwm_points(
241255 overwrite_output ,
242256 nwm_version ,
243257 variable_mapper ,
244- timeseries_type
258+ timeseries_type ,
259+ drop_overlapping_assimilation_values
245260 )
0 commit comments