ATOMScience-org
diff --git a/‎.github/workflows/pytest.yml‎
Lines changed: 9 additions & 1 deletion b/‎.github/workflows/pytest.yml‎
Lines changed: 9 additions & 1 deletion
diff --git a/‎Makefile‎
Lines changed: 13 additions & 13 deletions b/‎Makefile‎
Lines changed: 13 additions & 13 deletions
diff --git a/‎README.md‎
Lines changed: 10 additions & 2 deletions b/‎README.md‎
Lines changed: 10 additions & 2 deletions
diff --git a/‎VERSION‎
Lines changed: 1 addition & 1 deletion b/‎VERSION‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎atomsci/ddm/docs/PARAMETERS.md‎
Lines changed: 90 additions & 1 deletion b/‎atomsci/ddm/docs/PARAMETERS.md‎
Lines changed: 90 additions & 1 deletion
diff --git a/‎atomsci/ddm/docs/source/conf.py‎
Lines changed: 1 addition & 1 deletion b/‎atomsci/ddm/docs/source/conf.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎atomsci/ddm/docs/source/tutorials/05_hyperopt.rst‎
Lines changed: 1 addition & 1 deletion b/‎atomsci/ddm/docs/source/tutorials/05_hyperopt.rst‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎atomsci/ddm/docs/source/tutorials/ampl_tutorials_intro.rst‎
Lines changed: 3 additions & 1 deletion b/‎atomsci/ddm/docs/source/tutorials/ampl_tutorials_intro.rst‎
Lines changed: 3 additions & 1 deletion
@@ -1,6 +1,13 @@
 name: tests
 
-on: [push, pull_request]
+on: 
+    push: 
+      branches: ["**"]
+    pull_request:
+      types:
+        - opened
+        - reopened
+        - ready_for_review
 
 jobs:
   pytest-unit:
@@ -40,6 +47,7 @@ jobs:
       - name: pytest
         run: |
           # python3.9 -m pytest --capture=sys --capture=fd --cov=atomsci/ -vv atomsci/ddm/test/unit
+          python3.9 -m pytest --capture=sys --capture=fd --cov=atomsci/ -vv atomsci/modac/test/unit
           cd atomsci/ddm/test/unit && python3.9 -m pytest -n 2 --capture=sys --capture=fd --cov=atomsci -vv
         env:
           ENV: test
 
@@ -24,11 +24,11 @@ endif
 # Release version
 VERSION=$(shell cat VERSION)
 
-# If ENV is prod, we use VERSION for the tag, otherwise use PLATFORM
+# If ENV is from master branch, we use VERSION for the tag, otherwise use PLATFORM
 ifeq ($(ENV), prod)
     TAG = v$(VERSION)-$(SUBTAG)
 else
-    TAG = $(PLATFORM)-$(ENV)
+    TAG = $(ENV)-$(PLATFORM)
 endif
 
 # IMAGE REPOSITORY
@@ -51,7 +51,7 @@ WORK_DIR ?= work
 
 # Load Docker image
 load-docker:
-	docker load < ampl-$(PLATFORM)-$(ENV).tar.gz
+	docker load < ampl-$(TAG).tar.gz
 
 # Pull Docker image
 pull-docker:
@@ -63,12 +63,12 @@ push-docker:
 
 # Save Docker image
 save-docker:
-	docker save $(IMAGE_REPO):$(PLATFORM)-$(ENV) | gzip > ampl-$(PLATFORM)-$(ENV).tar.gz
+	docker save $(IMAGE_REPO):$(TAG) | gzip > ampl-$(TAG).tar.gz
 
 # Build Docker image
 build-docker:
 	@echo "Building Docker image for $(PLATFORM)"
-	docker buildx build -t $(IMAGE_REPO):$(PLATFORM)-$(ENV) --build-arg ENV=$(ENV) $(PLATFORM_ARG) --load -f Dockerfile.$(PLATFORM) .
+	docker buildx build -t $(IMAGE_REPO):$(TAG) --build-arg ENV=$(ENV) $(PLATFORM_ARG) --load -f Dockerfile.$(PLATFORM) .
 
 install: install-system
 
@@ -95,12 +95,12 @@ ifdef host
 	  $(GPU_ARG) \
 		--hostname $(host) \
 		--privileged \
-		-v $(shell pwd)/../$(WORK_DIR):/$(WORK_DIR) $(IMAGE_REPO):$(PLATFORM)-$(ENV) \
+		-v $(shell pwd)/../$(WORK_DIR):/$(WORK_DIR) $(IMAGE_REPO):$(TAG) \
 		/bin/bash -l -c "jupyter-notebook --ip=0.0.0.0 --no-browser --allow-root --port=$(JUPYTER_PORT)"
 else
 	docker run -p $(JUPYTER_PORT):$(JUPYTER_PORT) \
 		$(GPU_ARG) \
-		-v $(shell pwd)/../$(WORK_DIR):/$(WORK_DIR) $(IMAGE_REPO):$(PLATFORM)-$(ENV) \
+		-v $(shell pwd)/../$(WORK_DIR):/$(WORK_DIR) $(IMAGE_REPO):$(TAG) \
 		/bin/bash -l -c "jupyter-notebook --ip=0.0.0.0 --no-browser --allow-root --port=$(JUPYTER_PORT)"
 endif
 
@@ -109,34 +109,34 @@ endif
 jupyter-lab:
 	@echo "Starting Jupyter Lab"
 	docker run -p $(JUPYTER_PORT):$(JUPYTER_PORT) \
-		-v $(shell pwd)/../$(WORK_DIR):/$(WORK_DIR) $(IMAGE_REPO):$(PLATFORM)-$(ENV) \
+		-v $(shell pwd)/../$(WORK_DIR):/$(WORK_DIR) $(IMAGE_REPO):$(TAG) \
 		/bin/bash -l -c "jupyter-lab --ip=0.0.0.0 --allow-root --port=$(JUPYTER_PORT)"
 
 # Run pytest
 pytest: pytest-unit pytest-integrative
 
 pytest-integrative:
 	@echo "Running integrative tests"
-	docker run -v $(shell pwd)/$(WORK_DIR):/$(WORK_DIR) $(IMAGE_REPO):$(PLATFORM)-$(ENV) \
+	docker run -v $(shell pwd)/$(WORK_DIR):/$(WORK_DIR) $(IMAGE_REPO):$(TAG) \
 			/bin/bash -l -c "cd atomsci/ddm/test/integrative && ./integrative_batch_tests.sh"
 
 pytest-unit:
 	@echo "Running unit tests"
 	docker run -v $(shell pwd)/$(WORK_DIR):/$(WORK_DIR) $(IMAGE_REPO):$(TAG) \
-			/bin/bash -l -c "cd atomsci/ddm/test/unit && python3.9 -m pytest --capture=sys --capture=fd --cov=atomsci -vv"
+		       /bin/bash -l -c "cd atomsci/ddm/test/unit && python3.9 -m pytest --capture=sys --capture=fd --cov=atomsci -vv"
 
 # Run ruff linter
 ruff:
 	@echo "Running ruff"
-	docker run -it $(IMAGE_REPO):$(PLATFORM)-$(ENV) /bin/bash -l -c "ruff check ."
+	docker run -it $(IMAGE_REPO):$(TAG) /bin/bash -l -c "ruff check ."
 
 # Run ruff linter with fix
 ruff-fix:
 	@echo "Running ruff with fix"
-	docker run -it $(IMAGE_REPO):$(PLATFORM)-$(ENV) /bin/bash -l -c "ruff check . --fix"
+	docker run -it $(IMAGE_REPO):$(TAG) /bin/bash -l -c "ruff check . --fix"
 
 shell:
-	docker run -v $(shell pwd)/../$(WORK_DIR):/$(WORK_DIR) -it $(IMAGE_REPO):$(PLATFORM)-$(ENV) /bin/bash
+	docker run -v $(shell pwd)/../$(WORK_DIR):/$(WORK_DIR) -it $(IMAGE_REPO):$(TAG) /bin/bash
 
 # Setup virtual environment and install dependencies
 setup:
 
@@ -15,10 +15,11 @@ An open-source, end-to-end software pipeline for data curation, model building,
 
 The ATOM Modeling PipeLine (AMPL) extends the functionality of DeepChem and supports an array of machine learning and molecular featurization tools to predict key potency, safety and pharmacokinetic-relevant parameters. AMPL has been benchmarked on a large collection of pharmaceutical datasets covering a wide range of parameters. This is a living software project with active development. Check back for continued updates. Feedback is welcomed and appreciated, and the project is open to contributions! An [article describing the AMPL project](https://pubs.acs.org/doi/abs/10.1021/acs.jcim.9b01053) was published in JCIM. The AMPL pipeline documentation is available [here](https://ampl.readthedocs.io/en/latest/pipeline.html).
 
+Check out our new tutorial series that walks through AMPL's end-to-end modeling pipeline to build a machine learning model! View them in our [docs](https://ampl.readthedocs.io/en/latest/) or as Jupyter notebooks in our [repo](https://github.com/ATOMScience-org/AMPL/tree/master/atomsci/ddm/examples/tutorials).
 
-![Static Badge](https://img.shields.io/badge/Announcement-1.6.1-blue)
+![Static Badge](https://img.shields.io/badge/Announcement-1.7.0-blue)
 
-Check out our new tutorial series that walks through AMPL's end-to-end modeling pipeline to build a machine learning model! View them in our [docs](https://ampl.readthedocs.io/en/latest/) or as Jupyter notebooks in our [repo](https://github.com/ATOMScience-org/AMPL/tree/master/atomsci/ddm/examples/tutorials).
+In addition to our written tutorials, we now provide a series of video tutorials on our YouTube channel, [ATOMScience-org](https://www.youtube.com/channel/UCOF6zZ7ltGwopYCoOGIFM-w). These videos are created to assist users in exploring and leveraging AMPL's robust capabilities.
 
 ---
 ## Table of contents
@@ -113,6 +114,12 @@ export PYTORCH_HIP_ALLOC_CONF=gargage_collection_threshold:0.9,max_split_size_mb
 export TF_FORCE_GPU_ALLOW_GROWTH=true
 ```
 
+- Install pytest, plotting packages for development, test use.
+
+```bash
+cd AMPL/pip
+pip install -r dev_requirements.txt
+```
 #### 6. *(Optional) LLNL LC only*: if you use [model_tracker](https://ampl.readthedocs.io/en/latest/pipeline.html#module-pipeline.model_tracker), install atomsci.clients
 ```bash
 # LLNL only: required for ATOM model_tracker
@@ -145,6 +152,7 @@ cd AMPL/pip
 # If use CUDA: 
 # module load cuda/11.8                         
 pip install -r cpu_requirements.txt    # install cpu_requirements.txt OR cuda_requirements.txt  
+pip install -r dev_requirements.txt    # install pytest, plotting packages.
 
 # LLNL only: required for ATOM model_tracker
 # pip install -r clients_requirements.txt
 
@@ -1 +1 @@
-1.6.3
+1.7.0
@@ -276,6 +276,14 @@ The AMPL pipeline contains many parameters and options to fit models and make pr
 |*Description:*|True/False flag for setting verbosity|
 |*Default:*|FALSE|
 |*Type:*|Bool|
+
+- **seed**  
+  
+|||
+|-|-|
+|*Description:*|Seed used for initializing a random number generator to ensure results are reproducible. Default is None and a random seed will be generated.|
+|*Default:*|None|
+|*Type:*|int|
 
 - **production**  
 
@@ -529,6 +537,30 @@ the model will train for max_epochs regardless of validation error.|
 |*Default:*|scaffold|
 |*Type:*|str|
 
+- **sampling_method**  
+  
+|||
+|-|-|
+|*Description:*|The sampling method for addressing class imbalance in classification datasets. Options include 'undersampling' and 'SMOTE'.|
+|*Default:*|None|
+|*Type:*|str|
+
+- **sampling_ratio**  
+  
+|||
+|-|-|
+|*Description:*|The desired ratio of the minority class to the majority class after sampling (e.g., if str, 'minority', 'not minority'; if float, '0.2', '1.0'). |
+|*Default:*|auto|
+|*Type:*|str|
+
+- **sampling_k_neighbors**  
+  
+|||
+|-|-|
+|*Description:*|The number of nearest neighbors to consider when generating synthetic samples (e.g., 5, 7, 9). Specifically used for SMOTE sampling method.|
+|*Default:*|5|
+|*Type:*|int|
+
 - **mtss\_num\_super\_scaffolds**  
 
 |||
@@ -605,6 +637,14 @@ the model will train for max_epochs regardless of validation error.|
 |*Description:*|type of transformation for the response column (defaults to "normalization") TODO: Not currently implemented|
 |*Default:*|normalization|
 
+- **weight\_transform\_type**  
+  
+|||
+|-|-|
+|*Description:*|type of transformation for class weights in a classification model loss function. Use the "balancing" option to offset the effect of imbalanced datasets. Works with NN, random forest and XGBoost models. |
+|*Default:*|None|
+|*Type:*|Choice|
+  
 - **transformer\_bucket**  
 
 |||
@@ -692,6 +732,20 @@ the model will train for max_epochs regardless of validation error.|
 |*Description:*|Minimum loss reduction required to make a further partition on a leaf node of the tree. Can be input as a comma separated list for hyperparameter search (e.g. '0.0,0.1,0.2')|
 |*Default:*|0.0|
 
+- **xgb\_alpha**  
+  
+|||
+|-|-|
+|*Description:*|L1 regularization term on weights. Increasing this value will make model more conservative. Can be input as a comma separated list for hyperparameter search (e.g. '0.0,0.1,0.2')|
+|*Default:*|0.0|
+  
+- **xgb\_lambda**  
+  
+|||
+|-|-|
+|*Description:*|L2 regularization term on weights. Increasing this value will make model more conservative. Can be input as a comma separated list for hyperparameter search (e.g. '0.0,0.1,0.2')|
+|*Default:*|1.0|
+  
 - **xgb\_learning\_rate**  
 
 |||
@@ -710,7 +764,7 @@ the model will train for max_epochs regardless of validation error.|
 
 |||
 |-|-|
-|*Description:*|Minimum sum of instance weight(hessian) needed in a child. Can be input as a comma separated list for hyperparameter search (e.g. '1.0,1.1,1.2')|
+|*Description:*|Minimum sum of instance weights (hessian) needed in a child. Can be input as a comma separated list for hyperparameter search (e.g. '1.0,1.1,1.2')|
 |*Default:*|1.0|
 
 - **xgb\_n\_estimators**  
@@ -1057,6 +1111,27 @@ tied to a specific model parameter. Only a subset of model parameters may be opt
 |*Description:*|Search domain for NN model `layer_sizes` parameter in Bayesian Optimization. The format is `scheme\|num_layers\|parameters`, e.g. `uniformint\|3\|8,512`, Note that the number of layers (number between two \|) can not be changed during optimization, if you want to try different number of layers, just run several optimizations.
 |*Default:*|None|
 
+- **ls_ratio**  
+  
+|||
+|-|-|
+|*Description:*|Alternative method to set search domain for NN model `layer_sizes` parameter in Bayesian Optimization by specifying layer_size/previous_layer_size ratios. The format is `scheme\|ratios`, e.g. `uniform\|0.1,0.9`; the number of layers and starting layer sizes are taken from the `ls` parameter.
+|*Default:*|None|
+
+- **wdp**  
+  
+|||
+|-|-|
+|*Description:*|Search domain for NN model `weight_decay_penalty` parameter in Bayesian Optimization. The format is `scheme\|parameters`, e.g. `loguniform\|-6.908,-4.605`.
+|*Default:*|None|
+
+- **wdt**  
+  
+|||
+|-|-|
+|*Description:*|Search domain for NN model `weight_decay_penalty_type` parameter in Bayesian Optimization. The format is `scheme\|parameters`, e.g. `choice\|l1,l2`.
+|*Default:*|None|
+
 - **rfe**  
 
 |||
@@ -1085,6 +1160,20 @@ tied to a specific model parameter. Only a subset of model parameters may be opt
 |*Description:*|Search domain for XGBoost model `xgb_gamma` parameter in Bayesian Optimization. The format is `scheme\|parameters`, e.g. `loguniform\|-9.2,-4.6`.
 |*Default:*|None|
 
+- **xgba**  
+  
+|||
+|-|-|
+|*Description:*|Search domain for XGBoost model `xgb_alpha` parameter in Bayesian Optimization. The format is `scheme\|parameters`, e.g. `uniform\|0,0.4`.
+|*Default:*|None|
+
+- **xgbb**  
+  
+|||
+|-|-|
+|*Description:*|Search domain for XGBoost model `xgb_lambda` parameter in Bayesian Optimization. The format is `scheme\|parameters`, e.g. `uniform\|0,0.4`.
+|*Default:*|None|
+
 - **xgbl**  
 
 |||
 
@@ -28,7 +28,7 @@
 # import atomsci.ddm
 #version = atomsci.ddm.__version__
 # The short X.Y version
-version = '1.6.3'
+version = '1.7.0'
 copyright = f'{datetime.datetime.now().year}, {author}'
 
 # The full version, including alpha/beta/rc tags
 
@@ -235,7 +235,7 @@ models to select the best hyperparameters"**.
 
 
 Examples of Other Parameter Sets
-*****************************
+********************************
 
 Below are some parameters that can be used for **neural networks**,
 `XGBoost <https://en.wikipedia.org/wiki/XGBoost>`_ models,
 
@@ -16,6 +16,8 @@ properties. We have created easy to follow tutorials that walk through the steps
 `AMPL <https://github.com/ATOMScience-org/AMPL>`_, curate a dataset, effectively train and evaluate a machine 
 learning model, and use that model to make predictions.
 
+In addition to our written tutorials, we now provide a series of video tutorials on our YouTube channel, `ATOMScience-org <https://www.youtube.com/channel/UCOF6zZ7ltGwopYCoOGIFM-w>`_.  These videos are created to assist users in exploring and leveraging AMPL's robust capabilities.
+
 End-to-End Modeling Pipeline Tutorial Series
 ********************************************
 
@@ -50,4 +52,4 @@ Although the tutorials are designed to be run in sequence, using an example data
 provided within `AMPL <https://github.com/ATOMScience-org/AMPL>`_, we have also provided copies of the intermediate files generated by each tutorial that are 
 required by subsequent tutorials, so that you can run them in any order.
 
-Also, if you have issues or questions about the tutorials, please create an issue `here <https://github.com/ATOMScience-org/AMPL/issues>`_.
+Also, if you have issues or questions about the tutorials, please create an issue `here <https://github.com/ATOMScience-org/AMPL/issues>`_.