1+ from datetime import time
12import os
23import inspect
34import webbrowser
67import pandas as pd
78import numpy as np
89
9- from typing import Dict , List , Tuple
10+ from typing import Dict , List , Tuple , Union
1011
1112from concurrent .futures import ThreadPoolExecutor , as_completed , thread
1213
14+
15+ OperationsTuple = Tuple [str ,str ,str ,str ,str ]
16+ ResultsTuple = Tuple [bool ,bool ,bool ,float ]
17+
1318class RegressTest (object ):
1419 def __init__ (self , compare_case :str , operations :List [str ]= [], activities :List [str ]= [],
1520 tcodes :List [str ] = ['2' ], ids :List [str ] = [], threads :int = os .cpu_count () - 1 ) -> None :
@@ -38,13 +43,22 @@ def _init_files(self):
3843
3944 def _get_hbn_data (self , test_dir : str ) -> None :
4045 sub_dir = os .path .join (test_dir , 'HSPFresults' )
46+ self .hspf_data_collection = {}
4147 for file in os .listdir (sub_dir ):
4248 if file .lower ().endswith ('.hbn' ):
43- self .hspf_data = HBNOutput (os .path .join (test_dir , sub_dir , file ))
44- break
45- self .hspf_data .read_data ()
46-
47- def _get_hdf5_data (self , test_dir : str ) -> List [HDF5 ]:
49+ hspf_data = HBNOutput (os .path .join (test_dir , sub_dir , file ))
50+ hspf_data .read_data ()
51+ for key in hspf_data .output_dictionary .keys ():
52+ self .hspf_data_collection [key ] = hspf_data
53+
54+ def get_hspf_time_series (self , ops :OperationsTuple ) -> Union [pd .Series ,None ]:
55+ operation , activity , id , constituent , tcode = ops
56+ key = f'{ operation } _{ activity } _{ id } _{ tcode } '
57+ hspf_data = self .hspf_data_collection [key ]
58+ series = hspf_data .get_time_series (operation , int (id ), constituent , activity , 'hourly' )
59+ return series
60+
61+ def _get_hdf5_data (self , test_dir : str ) -> None :
4862 sub_dir = os .path .join (test_dir , 'HSP2results' )
4963 for file in os .listdir (sub_dir ):
5064 if file .lower ().endswith ('.h5' ) or file .lower ().endswith ('.hdf' ):
@@ -62,26 +76,27 @@ def should_compare(self, operation:str, activity:str, id:str, tcode:str) -> bool
6276 return False
6377 return True
6478
65- def generate_report (self , file :str , results : Dict [Tuple [ str , str , str , str , str ], Tuple [ bool , bool , bool , float ] ]) -> None :
79+ def generate_report (self , file :str , results : Dict [OperationsTuple , ResultsTuple ]) -> None :
6680 html = self .make_html_report (results )
6781 self .write_html (file ,html )
6882 webbrowser .open_new_tab ('file://' + file )
6983
70- def make_html_report (self , results_dict :Dict [Tuple [ str , str , str , str , str ], Tuple [ bool , bool , bool , float ] ]) -> str :
84+ def make_html_report (self , results_dict :Dict [OperationsTuple , ResultsTuple ]) -> str :
7185 """populates html table"""
7286 style_th = 'style="text-align:left"'
7387 style_header = 'style="border:1px solid; background-color:#EEEEEE"'
7488
7589 html = f'<html><header><h1>CONVERSION TEST REPORT</h1></header><body>\n '
7690 html += f'<table style="border:1px solid">\n '
7791
78- for key in self .hspf_data . output_dictionary .keys ():
92+ for key in self .hspf_data_collection .keys ():
7993 operation , activity , opn_id , tcode = key .split ('_' )
8094 if not self .should_compare (operation , activity , opn_id , tcode ):
8195 continue
8296 html += f'<tr><th colspan=5 { style_header } >{ key } </th></tr>\n '
8397 html += f'<tr><th></th><th { style_th } >Constituent</th><th { style_th } >Max Diff</th><th>Match</th><th>Note</th></tr>\n '
84- for cons in self .hspf_data .output_dictionary [key ]:
98+ hspf_data = self .hspf_data_collection [key ]
99+ for cons in hspf_data .output_dictionary [key ]:
85100 result = results_dict [(operation ,activity ,opn_id , cons , tcode )]
86101 no_data_hsp2 , no_data_hspf , match , diff = result
87102 html += self .make_html_comp_row (cons , no_data_hsp2 , no_data_hspf , match , diff )
@@ -110,16 +125,17 @@ def write_html(self, file:str, html:str) -> None:
110125 with open (file , 'w' ) as f :
111126 f .write (html )
112127
113- def run_test (self ) -> Dict [Tuple [ str , str , str , str , str ], Tuple [ bool , bool , bool , float ] ]:
128+ def run_test (self ) -> Dict [OperationsTuple , ResultsTuple ]:
114129 futures = {}
115130 results_dict = {}
116131
117132 with ThreadPoolExecutor (max_workers = self .threads ) as executor :
118- for key in self .hspf_data . output_dictionary .keys ():
133+ for key in self .hspf_data_collection .keys ():
119134 (operation , activity , opn_id , tcode ) = key .split ('_' )
120135 if not self .should_compare (operation , activity , opn_id , tcode ):
121136 continue
122- for cons in self .hspf_data .output_dictionary [key ]:
137+ hspf_data = self .hspf_data_collection [key ]
138+ for cons in hspf_data .output_dictionary [key ]:
123139 params = (operation ,activity ,opn_id ,cons ,tcode )
124140 futures [executor .submit (self .check_con , params )] = params
125141
@@ -129,13 +145,13 @@ def run_test(self) -> Dict[Tuple[str,str,str,str,str],Tuple[bool,bool,bool,float
129145
130146 return results_dict
131147
132- def check_con (self , params :Tuple [ str , str , str , str , str ] ) -> Tuple [ bool , bool , bool , float ] :
148+ def check_con (self , params :OperationsTuple ) -> ResultsTuple :
133149 """Performs comparision of single constituent"""
134150 operation , activity , id , constituent , tcode = params
135151 print (f' { operation } _{ id } { activity } { constituent } \n ' )
136152
137153 ts_hsp2 = self .hsp2_data .get_time_series (operation , id , constituent , activity )
138- ts_hspf = self .hspf_data . get_time_series ( operation , int ( id ), constituent , activity , 'hourly' )
154+ ts_hspf = self .get_hspf_time_series ( params )
139155
140156 no_data_hsp2 = ts_hsp2 is None
141157 no_data_hspf = ts_hspf is None
@@ -157,22 +173,20 @@ def check_con(self, params:Tuple[str,str,str,str,str]) -> Tuple[bool,bool,bool,f
157173 elif constituent == 'QTOTAL' or constituent == 'HTEXCH' :
158174 tolerance = max (abs (ts_hsp2 .values .min ()), abs (ts_hsp2 .values .max ())) * 1e-3
159175
160- ts_hsp2 , ts_hspf = self .validate_time_series (ts_hsp2 , ts_hspf ,
161- self .hsp2_data , self .hspf_data , operation , activity , id , constituent )
176+ ts_hsp2 , ts_hspf = self .validate_time_series (ts_hsp2 , ts_hspf , operation , activity , id , constituent )
162177
163178 match , diff = self .compare_time_series (ts_hsp2 , ts_hspf , tolerance )
164179
165180 return (no_data_hsp2 , no_data_hspf , match , diff )
166181
167182 def fill_nan_and_null (self , timeseries :pd .Series , replacement_value :float = 0.0 ) -> pd .Series :
168- """Replaces any nan or HSPF nulls -1.0e26 with provided replacement_value"""
183+ """Replaces any nan or HSPF nulls -1.0e30 with provided replacement_value"""
169184 timeseries = timeseries .fillna (replacement_value )
170- timeseries = timeseries .replace ( - 1.0e26 , replacement_value )
185+ timeseries = timeseries .where ( timeseries > - 1.0e30 , replacement_value )
171186 return timeseries
172187
173- def validate_time_series (self , ts_hsp2 :pd .Series , ts_hspf :pd .Series ,
174- hsp2_data :HDF5 , hspf_data :HBNOutput , operation :str , activity :str ,
175- id :str , cons :str ) -> Tuple [pd .Series , pd .Series ]:
188+ def validate_time_series (self , ts_hsp2 :pd .Series , ts_hspf :pd .Series , operation :str ,
189+ activity :str , id :str , cons :str ) -> Tuple [pd .Series , pd .Series ]:
176190 """ validates a corrects time series to avoid false differences """
177191
178192 # In some test cases it looked like HSP2 was executing for a single extra time step
@@ -187,8 +201,11 @@ def validate_time_series(self, ts_hsp2:pd.Series, ts_hspf:pd.Series,
187201 ### special cases
188202 # if tiny suro in one and no suro in the other, don't trigger on suro-dependent numbers
189203 if activity == 'PWTGAS' and cons in ['SOTMP' , 'SODOX' , 'SOCO2' ]:
190- ts_suro_hsp2 = hsp2_data .get_time_series (operation , id , "SURO" , "PWATER" )
191- ts_suro_hspf = hspf_data .get_time_series (operation , int (id ), "SURO" , "PWATER" , 'hourly' )
204+ ts_suro_hsp2 = self .hsp2_data .get_time_series (operation , id , 'SURO' , 'PWATER' )
205+ ts_suro_hsp2 = self .fill_nan_and_null (ts_suro_hsp2 )
206+ ts_suro_hspf = self .get_hspf_time_series ((operation , 'PWATER' , id , 'SURO' , 2 ))
207+ ts_suro_hsp2 = self .fill_nan_and_null (ts_suro_hspf )
208+
192209
193210 idx_zero_suro_hsp2 = ts_suro_hsp2 == 0
194211 idx_low_suro_hsp2 = ts_suro_hsp2 < 1.0e-8
@@ -200,7 +217,8 @@ def validate_time_series(self, ts_hsp2:pd.Series, ts_hspf:pd.Series,
200217
201218 # if volume in reach is going to zero, small concentration differences are not signficant
202219 if activity == 'SEDTRN' and cons in ['SSEDCLAY' , 'SSEDTOT' ]:
203- ts_vol_hsp2 = hsp2_data .get_time_series (operation , id , "VOL" , "HYDR" )
220+ ts_vol_hsp2 = self .hsp2_data .get_time_series (operation , id , "VOL" , "HYDR" )
221+ ts_vol_hsp2 = self .fill_nan_and_null (ts_vol_hsp2 )
204222
205223 idx_low_vol = ts_vol_hsp2 < 1.0e-4
206224 ts_hsp2 .loc [idx_low_vol ] = ts_hsp2 .loc [idx_low_vol ] = 0
0 commit comments