Skip to content

Commit b89515e

Browse files
authored
Merge pull request #401 from ATOMScience-org/1.7.0
1.7.0
2 parents d271984 + be3a7d6 commit b89515e

File tree

171 files changed

+22024
-1525
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

171 files changed

+22024
-1525
lines changed

.github/workflows/pytest.yml

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,13 @@
11
name: tests
22

3-
on: [push, pull_request]
3+
on:
4+
push:
5+
branches: ["**"]
6+
pull_request:
7+
types:
8+
- opened
9+
- reopened
10+
- ready_for_review
411

512
jobs:
613
pytest-unit:
@@ -40,6 +47,7 @@ jobs:
4047
- name: pytest
4148
run: |
4249
# python3.9 -m pytest --capture=sys --capture=fd --cov=atomsci/ -vv atomsci/ddm/test/unit
50+
python3.9 -m pytest --capture=sys --capture=fd --cov=atomsci/ -vv atomsci/modac/test/unit
4351
cd atomsci/ddm/test/unit && python3.9 -m pytest -n 2 --capture=sys --capture=fd --cov=atomsci -vv
4452
env:
4553
ENV: test

Makefile

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,11 @@ endif
2424
# Release version
2525
VERSION=$(shell cat VERSION)
2626

27-
# If ENV is prod, we use VERSION for the tag, otherwise use PLATFORM
27+
# If ENV is from master branch, we use VERSION for the tag, otherwise use PLATFORM
2828
ifeq ($(ENV), prod)
2929
TAG = v$(VERSION)-$(SUBTAG)
3030
else
31-
TAG = $(PLATFORM)-$(ENV)
31+
TAG = $(ENV)-$(PLATFORM)
3232
endif
3333

3434
# IMAGE REPOSITORY
@@ -51,7 +51,7 @@ WORK_DIR ?= work
5151

5252
# Load Docker image
5353
load-docker:
54-
docker load < ampl-$(PLATFORM)-$(ENV).tar.gz
54+
docker load < ampl-$(TAG).tar.gz
5555

5656
# Pull Docker image
5757
pull-docker:
@@ -63,12 +63,12 @@ push-docker:
6363

6464
# Save Docker image
6565
save-docker:
66-
docker save $(IMAGE_REPO):$(PLATFORM)-$(ENV) | gzip > ampl-$(PLATFORM)-$(ENV).tar.gz
66+
docker save $(IMAGE_REPO):$(TAG) | gzip > ampl-$(TAG).tar.gz
6767

6868
# Build Docker image
6969
build-docker:
7070
@echo "Building Docker image for $(PLATFORM)"
71-
docker buildx build -t $(IMAGE_REPO):$(PLATFORM)-$(ENV) --build-arg ENV=$(ENV) $(PLATFORM_ARG) --load -f Dockerfile.$(PLATFORM) .
71+
docker buildx build -t $(IMAGE_REPO):$(TAG) --build-arg ENV=$(ENV) $(PLATFORM_ARG) --load -f Dockerfile.$(PLATFORM) .
7272

7373
install: install-system
7474

@@ -95,12 +95,12 @@ ifdef host
9595
$(GPU_ARG) \
9696
--hostname $(host) \
9797
--privileged \
98-
-v $(shell pwd)/../$(WORK_DIR):/$(WORK_DIR) $(IMAGE_REPO):$(PLATFORM)-$(ENV) \
98+
-v $(shell pwd)/../$(WORK_DIR):/$(WORK_DIR) $(IMAGE_REPO):$(TAG) \
9999
/bin/bash -l -c "jupyter-notebook --ip=0.0.0.0 --no-browser --allow-root --port=$(JUPYTER_PORT)"
100100
else
101101
docker run -p $(JUPYTER_PORT):$(JUPYTER_PORT) \
102102
$(GPU_ARG) \
103-
-v $(shell pwd)/../$(WORK_DIR):/$(WORK_DIR) $(IMAGE_REPO):$(PLATFORM)-$(ENV) \
103+
-v $(shell pwd)/../$(WORK_DIR):/$(WORK_DIR) $(IMAGE_REPO):$(TAG) \
104104
/bin/bash -l -c "jupyter-notebook --ip=0.0.0.0 --no-browser --allow-root --port=$(JUPYTER_PORT)"
105105
endif
106106

@@ -109,34 +109,34 @@ endif
109109
jupyter-lab:
110110
@echo "Starting Jupyter Lab"
111111
docker run -p $(JUPYTER_PORT):$(JUPYTER_PORT) \
112-
-v $(shell pwd)/../$(WORK_DIR):/$(WORK_DIR) $(IMAGE_REPO):$(PLATFORM)-$(ENV) \
112+
-v $(shell pwd)/../$(WORK_DIR):/$(WORK_DIR) $(IMAGE_REPO):$(TAG) \
113113
/bin/bash -l -c "jupyter-lab --ip=0.0.0.0 --allow-root --port=$(JUPYTER_PORT)"
114114

115115
# Run pytest
116116
pytest: pytest-unit pytest-integrative
117117

118118
pytest-integrative:
119119
@echo "Running integrative tests"
120-
docker run -v $(shell pwd)/$(WORK_DIR):/$(WORK_DIR) $(IMAGE_REPO):$(PLATFORM)-$(ENV) \
120+
docker run -v $(shell pwd)/$(WORK_DIR):/$(WORK_DIR) $(IMAGE_REPO):$(TAG) \
121121
/bin/bash -l -c "cd atomsci/ddm/test/integrative && ./integrative_batch_tests.sh"
122122

123123
pytest-unit:
124124
@echo "Running unit tests"
125125
docker run -v $(shell pwd)/$(WORK_DIR):/$(WORK_DIR) $(IMAGE_REPO):$(TAG) \
126-
/bin/bash -l -c "cd atomsci/ddm/test/unit && python3.9 -m pytest --capture=sys --capture=fd --cov=atomsci -vv"
126+
/bin/bash -l -c "cd atomsci/ddm/test/unit && python3.9 -m pytest --capture=sys --capture=fd --cov=atomsci -vv"
127127

128128
# Run ruff linter
129129
ruff:
130130
@echo "Running ruff"
131-
docker run -it $(IMAGE_REPO):$(PLATFORM)-$(ENV) /bin/bash -l -c "ruff check ."
131+
docker run -it $(IMAGE_REPO):$(TAG) /bin/bash -l -c "ruff check ."
132132

133133
# Run ruff linter with fix
134134
ruff-fix:
135135
@echo "Running ruff with fix"
136-
docker run -it $(IMAGE_REPO):$(PLATFORM)-$(ENV) /bin/bash -l -c "ruff check . --fix"
136+
docker run -it $(IMAGE_REPO):$(TAG) /bin/bash -l -c "ruff check . --fix"
137137

138138
shell:
139-
docker run -v $(shell pwd)/../$(WORK_DIR):/$(WORK_DIR) -it $(IMAGE_REPO):$(PLATFORM)-$(ENV) /bin/bash
139+
docker run -v $(shell pwd)/../$(WORK_DIR):/$(WORK_DIR) -it $(IMAGE_REPO):$(TAG) /bin/bash
140140

141141
# Setup virtual environment and install dependencies
142142
setup:

README.md

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,11 @@ An open-source, end-to-end software pipeline for data curation, model building,
1515

1616
The ATOM Modeling PipeLine (AMPL) extends the functionality of DeepChem and supports an array of machine learning and molecular featurization tools to predict key potency, safety and pharmacokinetic-relevant parameters. AMPL has been benchmarked on a large collection of pharmaceutical datasets covering a wide range of parameters. This is a living software project with active development. Check back for continued updates. Feedback is welcomed and appreciated, and the project is open to contributions! An [article describing the AMPL project](https://pubs.acs.org/doi/abs/10.1021/acs.jcim.9b01053) was published in JCIM. The AMPL pipeline documentation is available [here](https://ampl.readthedocs.io/en/latest/pipeline.html).
1717

18+
Check out our new tutorial series that walks through AMPL's end-to-end modeling pipeline to build a machine learning model! View them in our [docs](https://ampl.readthedocs.io/en/latest/) or as Jupyter notebooks in our [repo](https://github.com/ATOMScience-org/AMPL/tree/master/atomsci/ddm/examples/tutorials).
1819

19-
![Static Badge](https://img.shields.io/badge/Announcement-1.6.1-blue)
20+
![Static Badge](https://img.shields.io/badge/Announcement-1.7.0-blue)
2021

21-
Check out our new tutorial series that walks through AMPL's end-to-end modeling pipeline to build a machine learning model! View them in our [docs](https://ampl.readthedocs.io/en/latest/) or as Jupyter notebooks in our [repo](https://github.com/ATOMScience-org/AMPL/tree/master/atomsci/ddm/examples/tutorials).
22+
In addition to our written tutorials, we now provide a series of video tutorials on our YouTube channel, [ATOMScience-org](https://www.youtube.com/channel/UCOF6zZ7ltGwopYCoOGIFM-w). These videos are created to assist users in exploring and leveraging AMPL's robust capabilities.
2223

2324
---
2425
## Table of contents
@@ -113,6 +114,12 @@ export PYTORCH_HIP_ALLOC_CONF=gargage_collection_threshold:0.9,max_split_size_mb
113114
export TF_FORCE_GPU_ALLOW_GROWTH=true
114115
```
115116

117+
- Install pytest, plotting packages for development, test use.
118+
119+
```bash
120+
cd AMPL/pip
121+
pip install -r dev_requirements.txt
122+
```
116123
#### 6. *(Optional) LLNL LC only*: if you use [model_tracker](https://ampl.readthedocs.io/en/latest/pipeline.html#module-pipeline.model_tracker), install atomsci.clients
117124
```bash
118125
# LLNL only: required for ATOM model_tracker
@@ -145,6 +152,7 @@ cd AMPL/pip
145152
# If use CUDA:
146153
# module load cuda/11.8
147154
pip install -r cpu_requirements.txt # install cpu_requirements.txt OR cuda_requirements.txt
155+
pip install -r dev_requirements.txt # install pytest, plotting packages.
148156

149157
# LLNL only: required for ATOM model_tracker
150158
# pip install -r clients_requirements.txt

VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
1.6.3
1+
1.7.0

atomsci/ddm/docs/PARAMETERS.md

Lines changed: 90 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,14 @@ The AMPL pipeline contains many parameters and options to fit models and make pr
276276
|*Description:*|True/False flag for setting verbosity|
277277
|*Default:*|FALSE|
278278
|*Type:*|Bool|
279+
280+
- **seed**
281+
282+
|||
283+
|-|-|
284+
|*Description:*|Seed used for initializing a random number generator to ensure results are reproducible. Default is None and a random seed will be generated.|
285+
|*Default:*|None|
286+
|*Type:*|int|
279287

280288
- **production**
281289

@@ -529,6 +537,30 @@ the model will train for max_epochs regardless of validation error.|
529537
|*Default:*|scaffold|
530538
|*Type:*|str|
531539

540+
- **sampling_method**
541+
542+
|||
543+
|-|-|
544+
|*Description:*|The sampling method for addressing class imbalance in classification datasets. Options include 'undersampling' and 'SMOTE'.|
545+
|*Default:*|None|
546+
|*Type:*|str|
547+
548+
- **sampling_ratio**
549+
550+
|||
551+
|-|-|
552+
|*Description:*|The desired ratio of the minority class to the majority class after sampling (e.g., if str, 'minority', 'not minority'; if float, '0.2', '1.0'). |
553+
|*Default:*|auto|
554+
|*Type:*|str|
555+
556+
- **sampling_k_neighbors**
557+
558+
|||
559+
|-|-|
560+
|*Description:*|The number of nearest neighbors to consider when generating synthetic samples (e.g., 5, 7, 9). Specifically used for SMOTE sampling method.|
561+
|*Default:*|5|
562+
|*Type:*|int|
563+
532564
- **mtss\_num\_super\_scaffolds**
533565

534566
|||
@@ -605,6 +637,14 @@ the model will train for max_epochs regardless of validation error.|
605637
|*Description:*|type of transformation for the response column (defaults to "normalization") TODO: Not currently implemented|
606638
|*Default:*|normalization|
607639

640+
- **weight\_transform\_type**
641+
642+
|||
643+
|-|-|
644+
|*Description:*|type of transformation for class weights in a classification model loss function. Use the "balancing" option to offset the effect of imbalanced datasets. Works with NN, random forest and XGBoost models. |
645+
|*Default:*|None|
646+
|*Type:*|Choice|
647+
608648
- **transformer\_bucket**
609649

610650
|||
@@ -692,6 +732,20 @@ the model will train for max_epochs regardless of validation error.|
692732
|*Description:*|Minimum loss reduction required to make a further partition on a leaf node of the tree. Can be input as a comma separated list for hyperparameter search (e.g. '0.0,0.1,0.2')|
693733
|*Default:*|0.0|
694734

735+
- **xgb\_alpha**
736+
737+
|||
738+
|-|-|
739+
|*Description:*|L1 regularization term on weights. Increasing this value will make model more conservative. Can be input as a comma separated list for hyperparameter search (e.g. '0.0,0.1,0.2')|
740+
|*Default:*|0.0|
741+
742+
- **xgb\_lambda**
743+
744+
|||
745+
|-|-|
746+
|*Description:*|L2 regularization term on weights. Increasing this value will make model more conservative. Can be input as a comma separated list for hyperparameter search (e.g. '0.0,0.1,0.2')|
747+
|*Default:*|1.0|
748+
695749
- **xgb\_learning\_rate**
696750

697751
|||
@@ -710,7 +764,7 @@ the model will train for max_epochs regardless of validation error.|
710764

711765
|||
712766
|-|-|
713-
|*Description:*|Minimum sum of instance weight(hessian) needed in a child. Can be input as a comma separated list for hyperparameter search (e.g. '1.0,1.1,1.2')|
767+
|*Description:*|Minimum sum of instance weights (hessian) needed in a child. Can be input as a comma separated list for hyperparameter search (e.g. '1.0,1.1,1.2')|
714768
|*Default:*|1.0|
715769

716770
- **xgb\_n\_estimators**
@@ -1057,6 +1111,27 @@ tied to a specific model parameter. Only a subset of model parameters may be opt
10571111
|*Description:*|Search domain for NN model `layer_sizes` parameter in Bayesian Optimization. The format is `scheme\|num_layers\|parameters`, e.g. `uniformint\|3\|8,512`, Note that the number of layers (number between two \|) can not be changed during optimization, if you want to try different number of layers, just run several optimizations.
10581112
|*Default:*|None|
10591113

1114+
- **ls_ratio**
1115+
1116+
|||
1117+
|-|-|
1118+
|*Description:*|Alternative method to set search domain for NN model `layer_sizes` parameter in Bayesian Optimization by specifying layer_size/previous_layer_size ratios. The format is `scheme\|ratios`, e.g. `uniform\|0.1,0.9`; the number of layers and starting layer sizes are taken from the `ls` parameter.
1119+
|*Default:*|None|
1120+
1121+
- **wdp**
1122+
1123+
|||
1124+
|-|-|
1125+
|*Description:*|Search domain for NN model `weight_decay_penalty` parameter in Bayesian Optimization. The format is `scheme\|parameters`, e.g. `loguniform\|-6.908,-4.605`.
1126+
|*Default:*|None|
1127+
1128+
- **wdt**
1129+
1130+
|||
1131+
|-|-|
1132+
|*Description:*|Search domain for NN model `weight_decay_penalty_type` parameter in Bayesian Optimization. The format is `scheme\|parameters`, e.g. `choice\|l1,l2`.
1133+
|*Default:*|None|
1134+
10601135
- **rfe**
10611136

10621137
|||
@@ -1085,6 +1160,20 @@ tied to a specific model parameter. Only a subset of model parameters may be opt
10851160
|*Description:*|Search domain for XGBoost model `xgb_gamma` parameter in Bayesian Optimization. The format is `scheme\|parameters`, e.g. `loguniform\|-9.2,-4.6`.
10861161
|*Default:*|None|
10871162

1163+
- **xgba**
1164+
1165+
|||
1166+
|-|-|
1167+
|*Description:*|Search domain for XGBoost model `xgb_alpha` parameter in Bayesian Optimization. The format is `scheme\|parameters`, e.g. `uniform\|0,0.4`.
1168+
|*Default:*|None|
1169+
1170+
- **xgbb**
1171+
1172+
|||
1173+
|-|-|
1174+
|*Description:*|Search domain for XGBoost model `xgb_lambda` parameter in Bayesian Optimization. The format is `scheme\|parameters`, e.g. `uniform\|0,0.4`.
1175+
|*Default:*|None|
1176+
10881177
- **xgbl**
10891178

10901179
|||

atomsci/ddm/docs/source/conf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
# import atomsci.ddm
2929
#version = atomsci.ddm.__version__
3030
# The short X.Y version
31-
version = '1.6.3'
31+
version = '1.7.0'
3232
copyright = f'{datetime.datetime.now().year}, {author}'
3333

3434
# The full version, including alpha/beta/rc tags

atomsci/ddm/docs/source/tutorials/05_hyperopt.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -235,7 +235,7 @@ models to select the best hyperparameters"**.
235235

236236

237237
Examples of Other Parameter Sets
238-
*****************************
238+
********************************
239239

240240
Below are some parameters that can be used for **neural networks**,
241241
`XGBoost <https://en.wikipedia.org/wiki/XGBoost>`_ models,

atomsci/ddm/docs/source/tutorials/ampl_tutorials_intro.rst

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ properties. We have created easy to follow tutorials that walk through the steps
1616
`AMPL <https://github.com/ATOMScience-org/AMPL>`_, curate a dataset, effectively train and evaluate a machine
1717
learning model, and use that model to make predictions.
1818

19+
In addition to our written tutorials, we now provide a series of video tutorials on our YouTube channel, `ATOMScience-org <https://www.youtube.com/channel/UCOF6zZ7ltGwopYCoOGIFM-w>`_. These videos are created to assist users in exploring and leveraging AMPL's robust capabilities.
20+
1921
End-to-End Modeling Pipeline Tutorial Series
2022
********************************************
2123

@@ -50,4 +52,4 @@ Although the tutorials are designed to be run in sequence, using an example data
5052
provided within `AMPL <https://github.com/ATOMScience-org/AMPL>`_, we have also provided copies of the intermediate files generated by each tutorial that are
5153
required by subsequent tutorials, so that you can run them in any order.
5254

53-
Also, if you have issues or questions about the tutorials, please create an issue `here <https://github.com/ATOMScience-org/AMPL/issues>`_.
55+
Also, if you have issues or questions about the tutorials, please create an issue `here <https://github.com/ATOMScience-org/AMPL/issues>`_.

0 commit comments

Comments
 (0)