Skip to content

Commit c509945

Browse files
authored
Merge pull request #347 from ATOMScience-org/1.6.2
1.6.2
2 parents 1d88757 + 439b2e4 commit c509945

File tree

12 files changed

+147
-104
lines changed

12 files changed

+147
-104
lines changed

README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,12 @@ An open-source, end-to-end software pipeline for data curation, model building,
1010
<img src="atomsci/ddm/docs/ATOM_cymatics_black_wordmark.jpg" width="370" height="100" class="center"></img>
1111

1212

13-
![Static Badge](https://img.shields.io/badge/Announcement-1.6.1-blue)![Static Badge](https://img.shields.io/badge/Announcement-1.6.1-blue)![Static Badge](https://img.shields.io/badge/Announcement-1.6.1-blue)![Static Badge](https://img.shields.io/badge/Announcement-1.6.1-blue)![Static Badge](https://img.shields.io/badge/Announcement-1.6.1-blue)![Static Badge](https://img.shields.io/badge/Announcement-1.6.1-blue)
13+
The ATOM Modeling PipeLine (AMPL) extends the functionality of DeepChem and supports an array of machine learning and molecular featurization tools to predict key potency, safety and pharmacokinetic-relevant parameters. AMPL has been benchmarked on a large collection of pharmaceutical datasets covering a wide range of parameters. This is a living software project with active development. Check back for continued updates. Feedback is welcomed and appreciated, and the project is open to contributions! An [article describing the AMPL project](https://pubs.acs.org/doi/abs/10.1021/acs.jcim.9b01053) was published in JCIM. For those without access to JCIM, a preprint of the article is available on [ArXiv](http://arxiv.org/abs/1911.05211). [Documentation is available here.](https://ampl.readthedocs.io/en/latest/pipeline.html)
1414

15-
## Check out our new tutorial series that walks through AMPL's end-to-end modeling pipeline to build a machine learning model! View them in our [docs](https://ampl.readthedocs.io/en/latest/) or as Jupyter notebooks in our [repo](https://github.com/ATOMScience-org/AMPL/tree/master/atomsci/ddm/examples/tutorials).
1615

17-
The ATOM Modeling PipeLine (AMPL) extends the functionality of DeepChem and supports an array of machine learning and molecular featurization tools to predict key potency, safety and pharmacokinetic-relevant parameters. AMPL has been benchmarked on a large collection of pharmaceutical datasets covering a wide range of parameters. This is a living software project with active development. Check back for continued updates. Feedback is welcomed and appreciated, and the project is open to contributions! An [article describing the AMPL project](https://pubs.acs.org/doi/abs/10.1021/acs.jcim.9b01053) was published in JCIM. For those without access to JCIM, a preprint of the article is available on [ArXiv](http://arxiv.org/abs/1911.05211). [Documentation is available here.](https://ampl.readthedocs.io/en/latest/pipeline.html)
16+
![Static Badge](https://img.shields.io/badge/Announcement-1.6.1-blue)
1817

18+
Check out our new tutorial series that walks through AMPL's end-to-end modeling pipeline to build a machine learning model! View them in our [docs](https://ampl.readthedocs.io/en/latest/) or as Jupyter notebooks in our [repo](https://github.com/ATOMScience-org/AMPL/tree/master/atomsci/ddm/examples/tutorials).
1919

2020
---
2121
## Table of contents

VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
1.6.1
1+
1.6.2

atomsci/ddm/pipeline/model_pipeline.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -728,9 +728,11 @@ def predict_on_smiles(self, smiles, verbose=False, AD_method=None, k=5, dist_met
728728
the featurizer may not be able to featurize all of them.
729729
"""
730730

731+
logger = logging.getLogger('ATOM')
732+
orig_log_level = logger.getEffectiveLevel()
733+
logger.setLevel(orig_log_level)
731734
if not verbose:
732735
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'
733-
logger = logging.getLogger('ATOM')
734736
logger.setLevel(logging.CRITICAL)
735737
sys.stdout = io.StringIO()
736738
import warnings
@@ -747,6 +749,7 @@ def predict_on_smiles(self, smiles, verbose=False, AD_method=None, k=5, dist_met
747749
res = self.predict_on_dataframe(df, AD_method=AD_method, k=k, dist_metric=dist_metric)
748750

749751
sys.stdout = sys.__stdout__
752+
logger.setLevel(orig_log_level)
750753

751754
return res
752755

@@ -1258,6 +1261,7 @@ def create_prediction_pipeline(params, model_uuid, collection_name=None, featuri
12581261
pipeline.model_wrapper = model_wrapper.create_model_wrapper(pipeline.params, featurization,
12591262
pipeline.ds_client)
12601263

1264+
orig_log_level = pipeline.log.getEffectiveLevel()
12611265
if params.verbose:
12621266
pipeline.log.setLevel(logging.DEBUG)
12631267
else:
@@ -1275,6 +1279,7 @@ def create_prediction_pipeline(params, model_uuid, collection_name=None, featuri
12751279
# Reload the saved model training state
12761280
pipeline.model_wrapper.reload_model(pipeline.model_wrapper.model_dir)
12771281

1282+
pipeline.log.setLevel(orig_log_level)
12781283
return pipeline
12791284

12801285

@@ -1371,6 +1376,7 @@ def create_prediction_pipeline_from_file(params, reload_dir, model_path=None, mo
13711376
# Create the ModelWrapper object.
13721377
pipeline.model_wrapper = model_wrapper.create_model_wrapper(pipeline.params, featurization)
13731378

1379+
orig_log_level = pipeline.log.getEffectiveLevel()
13741380
if verbose:
13751381
pipeline.log.setLevel(logging.DEBUG)
13761382
else:
@@ -1382,6 +1388,7 @@ def create_prediction_pipeline_from_file(params, reload_dir, model_path=None, mo
13821388
# If that worked, reload the saved model training state
13831389
pipeline.model_wrapper.reload_model(model_dir)
13841390

1391+
pipeline.log.setLevel(orig_log_level)
13851392
return pipeline
13861393

13871394

atomsci/ddm/pipeline/model_wrapper.py

Lines changed: 30 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1072,23 +1072,39 @@ def generate_predictions(self, dataset):
10721072
self.log.warning("Warning: DeepChem neural net models support uncertainty for regression only.")
10731073

10741074
if self.params.uncertainty and self.params.prediction_type == 'regression':
1075-
# For multitask, predict_uncertainty returns a list of (pred, std) tuples, one for each task.
1076-
# For singletask, it returns one tuple. Convert the result into a pair of ndarrays of shape (ncmpds, ntasks, nclasses).
1075+
# For the models we use, predict_uncertainty returns a tuple (not a list of tuples) for both singletask and multitask.
1076+
# A list is only returned if we request multiple *outputs* (e.g., predictions and embeddings), which are not the same thing as tasks.
1077+
1078+
# Fully connected NN models return predictions and uncertainties as arrays with shape (num_cmpds, num_tasks, num_classes), with
1079+
# num_classes = 1 for regression models. GraphConv regression models omit the num_classes dimension.
10771080
pred_std = self.model.predict_uncertainty(dataset)
10781081
if type(pred_std) == tuple:
1079-
#JEA
1080-
#ntasks = 1
1081-
ntasks = len(pred_std[0][0])
10821082
pred, std = pred_std
1083-
pred = pred.reshape((pred.shape[0], 1, pred.shape[1]))
1083+
ncmpds = pred.shape[0]
1084+
ntasks = pred.shape[1]
1085+
if len(pred.shape) > 2:
1086+
nclasses = pred.shape[2]
1087+
else:
1088+
nclasses = 1
1089+
self.log.debug(f"generate_predictions: input pred shape = {pred.shape}, std shape = {std.shape}")
1090+
# Reshape to 3 dimensions for consistency
1091+
pred = pred.reshape((ncmpds, ntasks, nclasses))
10841092
std = std.reshape(pred.shape)
1093+
self.log.debug(f"After reshaping: pred shape = {pred.shape}")
10851094
else:
1086-
ntasks = len(pred_std)
1087-
pred0, std0 = pred_std[0]
1088-
ncmpds = pred0.shape[0]
1089-
nclasses = pred0.shape[1]
1090-
pred = np.concatenate([p.reshape((ncmpds, 1, nclasses)) for p, s in pred_std], axis=1)
1091-
std = np.concatenate([s.reshape((ncmpds, 1, nclasses)) for p, s in pred_std], axis=1)
1095+
# ksm: I don't think this code block will never get run, but just in case, take the first output
1096+
# as the actual predictions
1097+
num_outputs = len(pred_std)
1098+
pred, std = pred_std[0]
1099+
ncmpds = pred.shape[0]
1100+
ntasks = pred.shape[1]
1101+
if len(pred.shape) > 2:
1102+
nclasses = pred.shape[2]
1103+
else:
1104+
nclasses = 1
1105+
self.log.info(f"generate_predictions returned {num_outputs} outputs: ntasks={ntasks}, ncmpds={ncmpds}, nclasses={nclasses}")
1106+
pred = pred.reshape((ncmpds, ntasks, nclasses))
1107+
std = std.reshape(pred.shape)
10921108

10931109
if self.params.transformers and self.transformers is not None:
10941110
# Transform the standard deviations, if we can. This is a bit of a hack, but it works for
@@ -1103,13 +1119,15 @@ def generate_predictions(self, dataset):
11031119
std = std / y_stds
11041120
pred = dc.trans.undo_transforms(pred, self.transformers)
11051121
else:
1122+
# Classification models and regression models without uncertainty are handled here
11061123
txform = [] if (not self.params.transformers or self.transformers is None) else self.transformers
11071124
pred = self.model.predict(dataset, txform)
11081125
if self.params.prediction_type == 'regression':
11091126
if type(pred) == list and len(pred) == 0:
11101127
# DeepChem models return empty list if no valid predictions
11111128
pred = np.array([]).reshape((0,0,1))
11121129
else:
1130+
# Reshape graphconv predictions to 3D array; others already have this shape
11131131
pred = pred.reshape((pred.shape[0], pred.shape[1], 1))
11141132
return pred, std
11151133

0 commit comments

Comments
 (0)