Skip to content

Commit 879099a

Browse files
authored
Merge pull request #112 from NOAA-GFDL/StrictModeTesting
2 parents a82935f + 7511c4e commit 879099a

File tree

10 files changed

+341
-265
lines changed

10 files changed

+341
-265
lines changed

.github/workflows/conda-env-create-run-pytest.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,5 +61,5 @@ jobs:
6161
- name: Test for completeness
6262
run: |
6363
which python
64-
python catalogbuilder/scripts/test_catalog.py -tf gfdl_autotest.json catalogbuilder/cats/gfdl_template.json
65-
python catalogbuilder/scripts/test_catalog.py -tf catalogbuilder/cats/gfdl_autotest_from_yaml.json
64+
python catalogbuilder/tests/compval.py --proper_generation -tf gfdl_autotest.json catalogbuilder/cats/gfdl_template.json
65+
python catalogbuilder/tests/compval.py --proper_generation -tf catalogbuilder/cats/gfdl_autotest_from_yaml.json

catalogbuilder/intakebuilder/CSVwriter.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ def listdict_to_csv(dict_info,headerlist, csvfile, overwrite, append,slow):
5353
if os.path.isfile(csvfile):
5454
user_input = ''
5555
while True:
56-
user_input = input('Found existing file! Overwrite? (y/n)')
56+
user_input = input('\nFound existing file! Overwrite? (y/n)\n')
5757

5858
if user_input.lower() == 'y':
5959
with open(csvfile, 'w') as csvfile:

catalogbuilder/scripts/gen_intake_gfdl.py

Lines changed: 90 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -2,42 +2,49 @@
22

33
import json
44
import sys,pandas as pd
5+
import time
56
import click
67
import os
78
from pathlib import Path
89
import logging
10+
from catalogbuilder.tests.compval import compval as cv
911

1012
logger = logging.getLogger('local')
1113
logger.setLevel(logging.INFO)
1214
logging.basicConfig(stream=sys.stdout)
1315

1416
try:
15-
from catalogbuilder.intakebuilder import gfdlcrawler, CSVwriter, configparser, getinfo
17+
from catalogbuilder.intakebuilder import gfdlcrawler, CSVwriter, configparser, getinfo
1618
except ModuleNotFoundError:
17-
print("The module intakebuilder is not installed. Do you have intakebuilder in your sys.path or have you activated the conda environment with the intakebuilder package in it? ")
18-
print("Attempting again with adjusted sys.path ")
19+
logger.warning("The module intakebuilder is not installed. Do you have intakebuilder in your sys.path or have you activated the conda environment with the intakebuilder package in it? ")
20+
logger.warning("Attempting again with adjusted sys.path ")
1921
try:
20-
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
22+
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
2123
except:
22-
print("Unable to adjust sys.path")
24+
logger.error("Unable to adjust sys.path")
2325
#print(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
2426
try:
25-
from intakebuilder import gfdlcrawler, CSVwriter, configparser,getinfo
26-
print(gfdlcrawler.__file__)
27+
28+
from intakebuilder import gfdlcrawler, CSVwriter, builderconfig, configparser,getinfo
29+
logger.info(gfdlcrawler.__file__)
30+
2731
except ModuleNotFoundError:
28-
sys.exit("The module 'intakebuilder' is still not installed. Do you have intakebuilder in your sys.path or have you activated the conda environment with the intakebuilder package in it? ")
32+
logger.error("The module 'intakebuilder' is still not installed. Do you have intakebuilder in your sys.path or have you activated the conda environment with the intakebuilder package in it?")
33+
raise ImportError("The module 'intakebuilder' is still not installed. Do you have intakebuilder in your sys.path or have you activated the conda environment with the intakebuilder package in it?")
2934

3035
package_dir = os.path.dirname(os.path.abspath(__file__))
3136
#template_path = os.path.join(package_dir, '../cats/gfdl_template.json')
3237

3338
def create_catalog(input_path=None, output_path=None, config=None, filter_realm=None, filter_freq=None, filter_chunk=None,
34-
overwrite=False, append=False, slow = False, verbose=False):
39+
overwrite=False, append=False, slow = False, strict = False, verbose=False):
3540
if verbose:
36-
logger.setLevel(logging.DEBUG)
37-
logger.info("Verbose log activated.")
41+
logger.setLevel(logging.DEBUG)
42+
logger.info("Verbose log activated.\n")
3843
else:
39-
logger.setLevel(logging.INFO)
40-
logger.info("[Mostly] silent log activated")
44+
logger.info("[Mostly] silent log activated\n")
45+
if strict:
46+
logger.warning("!!!!! STRICT MODE IS ACTIVE. CATALOG GENERATION WILL FAIL IF ERRORS ARE FOUND !!!!!\n")
47+
time.sleep(10)
4148
configyaml = None
4249
if (config is not None):
4350
configyaml = configparser.Config(config,logger)
@@ -48,38 +55,41 @@ def create_catalog(input_path=None, output_path=None, config=None, filter_realm=
4855
else:
4956
# If user does not pass a config, we will use the default config with the same format to avoid special cases
5057
#
51-
try:
52-
pkg = importlib_resources.files("catalogbuilder.scripts")
53-
config = pkg / "configs" / "config.yaml"
54-
logger.info("Default config path activated from package resources configs/config.yaml")
55-
except:
56-
try:
57-
config = os.path.join(package_dir, 'configs/config_default.yaml')
58-
logger.info("Default config path activated from path configs/config_default.yaml")
59-
except:
60-
sys.exit("Can't locate or read config, check --config ")
61-
configyaml = configparser.Config(config,logger)
62-
if(input_path is None):
58+
try:
59+
pkg = importlib_resources.files("catalogbuilder.scripts")
60+
config = pkg / "configs" / "config.yaml"
61+
logger.info("Default config path activated from package resources configs/config.yaml")
62+
except:
63+
try:
64+
config = os.path.join(package_dir, 'configs/config_default.yaml')
65+
logger.info("Default config path activated from path configs/config_default.yaml")
66+
except:
67+
raise FileNotFoundError("Can't locate or read config, check --config ")
68+
configyaml = configparser.Config(config,logger)
69+
if(input_path is None):
6370
input_path = configyaml.input_path
64-
if(output_path is None):
71+
if(output_path is None):
6572
output_path = configyaml.output_path
6673
if((input_path is None) or (output_path is None)):
67-
sys.exit("Missing: input_path or output_path. Pass it in the config yaml or as command-line option")
74+
logger.error("Missing: input_path or output_path. Pass it in the config yaml or as command-line option")
75+
raise TypeError("Missing: input_path or output_path. Pass it in the config yaml or as command-line option")
6876
if config is None or not configyaml.schema:
69-
logger.info("Default schema: catalogbuilder/cats/gfdl_template.json")
70-
template_path = os.path.join(package_dir, '../cats/gfdl_template.json')
77+
logger.info("Default schema: catalogbuilder/cats/gfdl_template.json")
78+
template_path = os.path.join(package_dir, '../cats/gfdl_template.json')
7179
else:
72-
template_path = configyaml.schema
73-
print("Using schema from config file", template_path)
80+
template_path = configyaml.schema
81+
logger.info("Using schema from config file", template_path)
7482
if not os.path.exists(input_path):
75-
sys.exit("Input path does not exist. Adjust configuration.")
83+
logger.error("Input path does not exist. Adjust configuration.")
84+
raise FileNotFoundError("Input path does not exist. Adjust configuration.")
7685
if not os.path.exists(Path(output_path).parent.absolute()):
77-
sys.exit("Output path parent directory does not exist. Adjust configuration.")
86+
logger.error("Output path parent directory does not exist. Adjust configuration.")
87+
raise ValueError("Output path parent directory does not exist. Adjust configuration.")
7888
logger.info("input path: "+ input_path)
79-
logger.info( " output path: "+ output_path)
89+
logger.info("output path: "+ output_path)
8090
project_dir = input_path
8191
csv_path = "{0}.csv".format(output_path)
82-
json_path = "{0}.json".format(output_path)
92+
json_path = "{0}.json".format(output_path)
8393

8494
######### SEARCH FILTERS ###########################
8595

@@ -120,40 +130,49 @@ def create_catalog(input_path=None, output_path=None, config=None, filter_realm=
120130
os.makedirs(os.path.dirname(csv_path), exist_ok=True)
121131
CSVwriter.listdict_to_csv(list_files, headers, csv_path, overwrite, append,slow)
122132
df = None
123-
if(slow == False) & ('standard_name' in headers ):
124-
#If we badly need standard name, we use gfdl cmip mapping tables especially when one does not prefer the slow option. Useful for MDTF runs
125-
df = pd.read_csv(os.path.abspath(csv_path), sep=",", header=0,index_col=False)
126-
list_variable_id = []
127-
try:
128-
list_variable_id = df["variable_id"].unique().tolist()
129-
except:
130-
print("Having trouble finding 'variable_id'... Be sure to add it to the output_path_template field of your configuration")
131-
try:
132-
list_realm = df["realm"].unique().tolist()
133-
except:
134-
print("Having trouble finding 'realm'... Be sure to add it to the output_path_template field of your configuration")
135-
dictVarCF = getinfo.getStandardName(list_variable_id,list_realm)
136-
#print("standard name from look-up table-", dictVarCF)
137-
for k, v in dictVarCF.items():
138-
try:
139-
var = k.split(",")[0]
140-
except ValueError:
141-
continue
142-
try:
143-
realm = k.split(",")[1]
144-
except ValueError:
145-
continue
146-
if(var is not None) & (realm is not None):
147-
df['standard_name'].loc[(df['variable_id'] == var) & (df['realm'] == realm) ] = v
148-
#df['standard_name'].loc[(df['variable_id'] == k)] = v
149-
if(slow == False) & ('standard_name' in headers):
150-
if ((df is not None) & (len(df) != 0) ):
151-
with open(csv_path, 'w') as csvfile:
152-
df.to_csv(csvfile,index=False)
153-
154-
print("JSON generated at:", os.path.abspath(json_path))
155-
print("CSV generated at:", os.path.abspath(csv_path))
156-
logger.info("CSV generated at" + os.path.abspath(csv_path))
133+
134+
if not slow and 'standard_name' in headers:
135+
#If we badly need standard name, we use gfdl cmip mapping tables especially when one does not prefer the slow option. Useful for MDTF runs
136+
df = pd.read_csv(os.path.abspath(csv_path), sep=",", header=0,index_col=False)
137+
list_variable_id = []
138+
try:
139+
list_variable_id = df["variable_id"].unique().tolist()
140+
except:
141+
raise KeyError("Having trouble finding 'variable_id'... Be sure to add it to the output_path_template field of your configuration")
142+
try:
143+
list_realm = df["realm"].unique().tolist()
144+
except:
145+
raise KeyError("Having trouble finding 'realm'... Be sure to add it to the output_path_template field of your configuration")
146+
dictVarCF = getinfo.getStandardName(list_variable_id,list_realm)
147+
#print("standard name from look-up table-", dictVarCF)
148+
for k, v in dictVarCF.items():
149+
try:
150+
var = k.split(",")[0]
151+
except ValueError:
152+
continue
153+
try:
154+
realm = k.split(",")[1]
155+
except ValueError:
156+
continue
157+
if(var is not None) & (realm is not None):
158+
df['standard_name'].loc[(df['variable_id'] == var) & (df['realm'] == realm) ] = v
159+
#df['standard_name'].loc[(df['variable_id'] == k)] = v
160+
161+
if ((df is not None) & (len(df) != 0) ):
162+
with open(csv_path, 'w') as csvfile:
163+
df.to_csv(csvfile,index=False)
164+
165+
# Strict Mode
166+
if strict:
167+
vocab = True
168+
proper_generation = False
169+
test_failure = False
170+
171+
#Validate
172+
cv(json_path,'',vocab, proper_generation, test_failure)
173+
174+
logger.info("JSON generated at: " + os.path.abspath(json_path))
175+
logger.info("CSV generated at: " + os.path.abspath(csv_path))
157176
return(csv_path,json_path)
158177

159178
#Setting up argument parsing/flags
@@ -170,10 +189,11 @@ def create_catalog(input_path=None, output_path=None, config=None, filter_realm=
170189
@click.option('--overwrite', is_flag=True, default=False)
171190
@click.option('--append', is_flag=True, default=False)
172191
@click.option('--slow','-s', is_flag=True, default=False, help='This option looks up standard names in netcdf file to fill up the standard name column if its present in the header specs. If standard_name is absent, long_name with space replaced by underscore is utilized')
192+
@click.option('--strict', is_flag=True, default=False, help='Strict catalog generation ensures catalogs are compliant with CV standards (as defined in vocabulary section of catalog schema)')
173193
@click.option('--verbose/--silent', default=False, is_flag=True) #default has silent option. Use --verbose for detailed logging
174194

175195
def create_catalog_cli(**kwargs):
176196
return create_catalog(**kwargs)
177-
197+
178198
if __name__ == '__main__':
179199
create_catalog_cli()

catalogbuilder/scripts/test_catalog.py

Lines changed: 0 additions & 70 deletions
This file was deleted.

0 commit comments

Comments
 (0)