Skip to content

Commit 8c2fc35

Browse files
committed
Generating SEF files, using OS environment variables to define the DB
1 parent e694fe1 commit 8c2fc35

File tree

7 files changed

+292
-22
lines changed

7 files changed

+292
-22
lines changed

DRAW-post-processing/config.py

Lines changed: 119 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,123 @@
1313
6: (19, 20, 21, 34, 35, 48, 61, 85),
1414
7: (18, 40, 41, 44, 51, 52, 56, 57, 66, 80, 82, 83)}
1515

16+
sef_type_to_field_id ={ "atb": (5,68),
17+
"au": (47),
18+
"cl":(22,53),
19+
"cd":(23),
20+
"ch":(16,54),
21+
"dd":(19),
22+
"e":(14),
23+
"hd":(17),
24+
"mslp":(7),
25+
"nl":(24),
26+
"p": (4,67),
27+
"p_cor":(7,69),
28+
"pr":(27,31),
29+
"ptb":(25,28),
30+
"pte":(26,29),
31+
"rh":(15,58,59,73,75,60,72),
32+
"rrt":(66),
33+
"sd":(50),
34+
"rain_dur":(70),
35+
"snow_dur":(71),
36+
"ss":(65),
37+
"ta":9,
38+
"ta_cor":(10),
39+
"tb":(11),
40+
"tb_cor":(12),
41+
"td":(33),
42+
"TGn":(62,81),
43+
"Tn":(38,76),
44+
"Tn_cor":(37,77),
45+
"Tx":(36,78),
46+
"Tx_cor":(37,79),
47+
"Tsx":(63,64),
48+
"wf":(85),
49+
"w":(34,48,35),
50+
"ws":(20),
51+
"ww":(18,52,40,44,57,56,80,83),
52+
"w2":(51,41,82)
53+
}
54+
sef_type_to_unit={ "atb": "C",
55+
"au": "text",
56+
"cl":"lct",
57+
"cd":"dir",
58+
"ch":"uct",
59+
"dd":"dir",
60+
"e":"hPa",
61+
"hd":"dir",
62+
"mslp":"hPa",
63+
"nl":"okta",
64+
"p": "hPa",
65+
"p_cor":"hPa",
66+
"pr":"mm",
67+
"ptb":"hr",
68+
"pte":"hr",
69+
"rh":"p",
70+
"rrt":"text",
71+
"sd":"mm",
72+
"rain_dur":"hh:mm",
73+
"snow_dur":"hh:mm",
74+
"ss":"mm",
75+
"ta":"C",
76+
"ta_cor":"C",
77+
"tb":"C",
78+
"tb_cor":"C",
79+
"td":"C",
80+
"TGn":"C",
81+
"Tn":"C",
82+
"Tn_cor":"C",
83+
"Tx":"C",
84+
"Tx_cor":"C",
85+
"Tsx":"C",
86+
"wf":"?",
87+
"w":"miles",
88+
"ws":"mph",
89+
"ww":"mno",
90+
"w2":"mno"
91+
}
92+
sef_type_to_start={"atb": "stat",
93+
"au": "stat",
94+
"cl":"stat",
95+
"cd":"stat",
96+
"ch":"stat",
97+
"dd":"stat",
98+
"e":"stat",
99+
"hd":"stat",
100+
"mslp":"stat",
101+
"nl":"stat",
102+
"p": "stat",
103+
"p_cor":"stat",
104+
"pr":"stat",
105+
"ptb":"stat",
106+
"pte":"stat",
107+
"rh":"stat",
108+
"rrt":"stat",
109+
"sd":"stat",
110+
"rain_dur":"stat",
111+
"snow_dur":"stat",
112+
"ss":"stat",
113+
"ta":"stat",
114+
"ta_cor":"stat",
115+
"tb":"stat",
116+
"tb_cor":"stat",
117+
"td":"stat",
118+
"TGn":"stat",
119+
"Tn":"stat",
120+
"Tn_cor":"stat",
121+
"Tx":"stat",
122+
"Tx_cor":"stat",
123+
"Tsx":"stat",
124+
"wf":"stat",
125+
"w":"stat",
126+
"ws":"stat",
127+
"ww":"stat",
128+
"w2":"stat"
129+
}
130+
sef_apply_utc=False
131+
sef_utc_offset=5
132+
16133
# look at 27 for inap
17134
# unexpected characters in a data entry (when not surrounded on either side by digits) TODO : determine if any alterations necessary for non-pressure values
18135
unexpected_characters = {'?', '.', '*', '&', '#', '^', '$', '(', ')', '[', ']', '{', '}', '"', '/', '@', "\\", ';'}
@@ -93,8 +210,8 @@ def possible_pressure_formats(value, for_leading_digits):
93210
temperature_outlier_std_factor=5
94211

95212
# threshold value for which fluctuation between previous timestamp and current timestamp (for same field id) requires further investigation (phase 2)
96-
scalar_fluctuation_thresholds = {'01': 0.00, '02': 0.00, '03': 0.00, '04': 0.00, '05': 0.00, '06': 0.00, # TODO : fill w/ pressure_fluctuation_stats results
97-
'07': 0.00, '08': 0.00, '09': 0.00, '10': 0.00, '11': 0.00, '12': 0.00}
213+
scalar_fluctuation_thresholds = {'01': 0.30, '02': 0.40, '03': 0.50, '04': 0.50, '05': 0.50, '06': 0.50, # TODO : fill w/ pressure_fluctuation_stats results
214+
'07': 0.60, '08': 0.60, '09': 0.60, '10': 0.70, '11': 0.80, '12': 0.80}
98215

99216
# amount of time (in hours) between timestamps, for which a pressure fluctuation isn't granular enough
100217
time_delta_limit = 12 # TODO : change to 3

DRAW-post-processing/database_connection.py

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,21 +10,24 @@
1010
phase_2_errors=[]
1111
duplicateless=[]
1212

13+
db_user=os.environ.get('DRAW_local_db_user')
14+
db_passwd=os.environ.get('DRAW_local_db_pass')
15+
db_name=os.environ.get('DRAW_local_db_name')
16+
#db_name="climate_test"
17+
#db_name="climatedatarescueprocessed"
1318
# connection to copy of database on local machine
1419
conn = mysql.connector.connect(
1520
##### FOLLOWING 3 VARIABLES TO BE CONFIGURED AS NECESSARY FOR LOCAL MACHINE: #####
16-
user='',
17-
password='',
18-
#database='climate_test',
19-
database='climatedatarescue',
21+
user=db_user,
22+
password=db_passwd,
23+
database=db_name,
2024
host='localhost'
2125
)
2226

2327
cursor = conn.cursor()
2428

25-
#url = "mysql+mysqlconnector://"+os.environ.get('DRAW_local_db_user')+":"+os.environ.get('DRAW_local_db_pass')+"@localhost/climate_test"
26-
url = "mysql+mysqlconnector://"+'user'+":"+'password'+"@localhost/climatedatarescue"
27-
#url = "mysql+mysqlconnector://"+''+":"+''+"@localhost/climate_test"
29+
url = "mysql+mysqlconnector://"+db_user+":"+db_passwd+"@localhost/"+db_name
30+
#url = "mysql+mysqlconnector://"+'user'+":"+'password'+"@localhost/climatedatarescue"
2831
engine = sqlalchemy.create_engine(url)
2932

3033

DRAW-post-processing/execute_post_process.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,9 @@
3232

3333
import phase2_methods as id1p2_methods
3434
import time
35+
36+
import sef_gen
37+
3538
#import argparse
3639

3740
def logPerf(message):
@@ -84,7 +87,7 @@ def filter_id(pp_id, entry, phase):
8487

8588
elif phase== 'outlier_removal':
8689
if pp_id ==1:
87-
#return id1outliers.patch_outlier(entry)
90+
return id1outliers.patch_outlier(entry)
8891
pass
8992
elif pp_id ==3:
9093
return id3outliers.patch_outlier(entry)
@@ -210,3 +213,10 @@ def filter_id(pp_id, entry, phase):
210213
tables.delete_table('data_entries_raw')
211214
tables.delete_table('data_entries_corrected')
212215
tables.delete_table('data_entries_corrected_duplicateless')
216+
logPerf("cleaned up database")
217+
218+
219+
#################### Generating SEF files ##########################
220+
print("Generating SEF files")
221+
sef_gen.generateSEFs()
222+
logPerf("SEF files generated")

DRAW-post-processing/post_process_ids/id3/id_3_phase_2.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -205,16 +205,16 @@ def flag_outliers (df, field_id):
205205
if outliers.size >0:
206206
if config.temperature_plot_outliers == True:
207207
ans_max=ans+config.temperature_outlier_std_factor*standard_deviation
208-
ans_min=ans-config.temperature_outlier_std_factor*standard_deviation
209-
plt.plot(x, y, '.', color ='black', label ="data")
210-
plt.plot(x, ans, '--', color ='blue', label ="rolling mean")
211-
plt.plot(x,ans_max,'-.', color='green')
212-
plt.plot(x,ans_min,'-.', color='green')
213-
plt.plot(outliers.observation_date, outliers.value,'o', color='red', label="Outliers")
214-
plt.title("Field: " +str(field_id))
215-
plt.legend()
216-
plt.ion()
217-
plt.show()
208+
ans_min=ans-config.temperature_outlier_std_factor*standard_deviation
209+
fig, ax = plt.subplots(1, figsize = (20, 8))
210+
fig.autofmt_xdate()
211+
ax.plot(x, y, '.', color ='black', label ="data")
212+
ax.plot(x, ans, '--', color ='blue', label ="rolling mean")
213+
ax.plot(x,ans_max,'-.', color='green', label=str(config.temperature_outlier_std_factor)+" sigma")
214+
ax.plot(x,ans_min,'-.', color='green')
215+
ax.plot(outliers.observation_date, outliers.value,'o', color='red', label="Outliers")
216+
ax.set_title("Field: " +str(field_id)+" - " + df_partial.field_key.iloc[0])
217+
ax.legend()
218218
#flag the outliers
219219
for ind,outlier in outliers.iterrows():
220220
df.at[ind,'flagged']=10

DRAW-post-processing/sef_gen.py

Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
# -*- coding: utf-8 -*-
2+
import database_connection as db
3+
import datetime
4+
import config
5+
import os
6+
cursor = db.cursor
7+
8+
def generateSEFs():
9+
for sef_type in config.sef_type_to_field_id.keys():
10+
generateSEF(sef_type)
11+
12+
13+
def correctValue(value):
14+
if value == "empty" or value is None:
15+
return "-999"
16+
return value
17+
18+
def correctObservationDate(sef_type,observation_date):
19+
if getPeriod(sef_type)=="24":
20+
observation_date.minute=0
21+
observation_date.hour=0
22+
elif config.sef_apply_utc==True:
23+
observation_date=observation_date+datetime.timedelta(hours=config.sef_utc_offset)
24+
return observation_date
25+
26+
def getPeriod(sef_type):
27+
if config.sef_type_to_start[sef_type]=="mean" or \
28+
config.sef_type_to_start[sef_type]=="daily" or \
29+
config.sef_type_to_start[sef_type]=="total":
30+
return "24"
31+
else:
32+
return "0"
33+
34+
35+
def getFilename(sef_type,type_result_set):
36+
filename=os.environ.get('DRAW_sef_folder')+os.sep+"McGill_DRAW_1491_"
37+
38+
if len(type_result_set)>0:
39+
sorted_type_results=sorted(type_result_set)
40+
41+
42+
# now we can complete the file name
43+
#we want to remove the leading -999 and the trailing -999
44+
#Get start
45+
start_found=0
46+
index_start=0
47+
48+
while start_found==0 and index_start<len(type_result_set):
49+
entry_value=sorted_type_results[index_start].split("\t")[6]
50+
if entry_value=="-999":
51+
index_start+=1
52+
else:
53+
start_found=1
54+
#Get end
55+
end_found=0
56+
index_end=-1
57+
58+
while end_found==0 and index_end > -len(type_result_set):
59+
entry_value=sorted_type_results[index_end].split("\t")[6]
60+
if entry_value=="-999":
61+
62+
index_end-=1
63+
else:
64+
end_found=1
65+
66+
if index_start<len(type_result_set):
67+
startStr=sorted_type_results[index_start].split("\t")
68+
filename=filename+startStr[0]+"-"+startStr[1]+"_"
69+
endStr=sorted_type_results[index_end].split("\t")
70+
filename=filename+endStr[0]+"-"+endStr[1]+"-"+sef_type+".tsv"
71+
return (filename,index_start,index_end)
72+
else:
73+
return (None, None, None)
74+
75+
76+
77+
78+
def generateSEF(sef_type):
79+
print("Generating SEF for type: " + sef_type)
80+
if type(config.sef_type_to_field_id[sef_type]) == int:
81+
query="select value,observation_date from data_entries_corrected_final_iso where field_id = {} order by observation_date asc".format(config.sef_type_to_field_id[sef_type])
82+
else:
83+
query="select value,observation_date from data_entries_corrected_final_iso where field_id in {} order by observation_date asc".format(config.sef_type_to_field_id[sef_type])
84+
cursor.execute(query)
85+
results=cursor.fetchall()
86+
type_result_set=[]
87+
for result in results:
88+
(value, observation_date)=result
89+
value=correctValue(value)
90+
observation_date=correctObservationDate(sef_type, observation_date)
91+
try:
92+
result_str=str(observation_date.year)+"\t"+str(observation_date.month)+"\t"+str(observation_date.day)+\
93+
"\t"+str(observation_date.hour)+"\t"+str(observation_date.minute)+"\t"+getPeriod(sef_type)+\
94+
"\t"+value+"\t|\t\n"
95+
type_result_set.append(result_str)
96+
except:
97+
print ("Couldn't generate SEF line for value="+str(value)+", observation date ="+str(observation_date))
98+
99+
100+
(filename,index_start,index_end)=getFilename(sef_type, type_result_set)
101+
if filename is not None:
102+
f=open(filename,"w")
103+
f.write ("SEF\t1.0.0\n")
104+
f.write ("ID\tID\n")
105+
f.write ("Name\t1491\n")
106+
f.write ("Lat\t45.5\n")
107+
f.write ("Lon\t-73.59\n")
108+
f.write ("Alt\t49\n")
109+
f.write ("Source\McGill\n")
110+
f.write ("Link\thttps://draw.geog.mcgill.ca/\n")
111+
f.write ("Vbl\t" + config.sef_type_to_unit[sef_type]+"\n")
112+
f.write ("Stat\t")
113+
if "mean" in config.sef_type_to_start[sef_type]:
114+
f.write("mean\n")
115+
else:
116+
f.write("point\n")
117+
f.write ("Unit\t" + config.sef_type_to_unit[sef_type]+"\n")
118+
f.write("Meta\t")
119+
f.write("UTCOffset=")
120+
if (config.sef_apply_utc==True):
121+
f.write("Applied\tUTCOffset="+str(config.sef_utc_offset))
122+
else:
123+
f.write("NO")
124+
f.write("\n")
125+
f.write ("Year\tMonth\tDay\tHour\tMinute\tPeriod\tValue\t|\tMeta\n")
126+
index=0
127+
sorted_type_results=sorted(type_result_set)
128+
for res in sorted_type_results:
129+
if index>=index_start and index<=index_end+len(sorted_type_results):
130+
f.write(res)
131+
index+=1
132+
133+
134+
135+
136+

DRAW-post-processing/sql_commands.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -108,8 +108,8 @@ def check_2_command(entry, counter):
108108
def ref_adjacent_fluctuations(entry, obs_datetime):
109109
field_id = entry[4]
110110
return phase_1_data_sql[:len(phase_1_data_sql) - 1] + " WHERE field_id = {} " \
111-
"AND observation_date LIKE '{}%';".format(field_id, str(obs_datetime)[:10])
112-
111+
"AND date(observation_date)='{}' order by observation_date asc;".format(field_id, str(obs_datetime)[:10])
112+
113113

114114
# retrieves relevant field_id's in ledger sheet, to calculate particular field_id based on other two elements, using equation 1, 2 oe 3 (PHASE 2)
115115
def equation_retrieve_row(entry, equation_num):
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
# -*- coding: utf-8 -*-
2+
import sef_gen
3+
4+
sef_gen.generateSEFs()

0 commit comments

Comments
 (0)