usnistgov
diff --git a/‎AFL/double_agent/TreePipeline.py‎
Lines changed: 76 additions & 0 deletions b/‎AFL/double_agent/TreePipeline.py‎
Lines changed: 76 additions & 0 deletions
diff --git a/‎AFL/double_agent/data/classification_pipeline.json‎
Lines changed: 827 additions & 0 deletions b/‎AFL/double_agent/data/classification_pipeline.json‎
Lines changed: 827 additions & 0 deletions
diff --git a/‎AFL/double_agent/data/example_classification_data.nc‎
22 MB b/‎AFL/double_agent/data/example_classification_data.nc‎
22 MB
diff --git a/‎AFL/double_agent/data/example_tree_structure.json‎
Lines changed: 401 additions & 0 deletions b/‎AFL/double_agent/data/example_tree_structure.json‎
Lines changed: 401 additions & 0 deletions
diff --git a/‎AFL/double_agent/data/reference_predictions.nc‎
307 KB b/‎AFL/double_agent/data/reference_predictions.nc‎
307 KB
diff --git a/‎pyproject.toml‎
Lines changed: 4 additions & 0 deletions b/‎pyproject.toml‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎tests/test_classifier_pipeline.py‎
Lines changed: 86 additions & 0 deletions b/‎tests/test_classifier_pipeline.py‎
Lines changed: 86 additions & 0 deletions
@@ -0,0 +1,76 @@
+from AFL.double_agent import *
+import numpy as np
+import pandas as pd
+from matplotlib import pyplot as plt
+import matplotlib
+#import tune_all_decisions as tad
+import itertools
+import joblib
+from io import BytesIO
+import xarray as xr
+import json
+import TreeHierarchy as te
+from sklearn.metrics import classification_report as cr
+from sklearn.metrics import root_mean_squared_error as RMSE
+from sklearn.metrics import mean_absolute_error as MAE
+from sklearn.metrics import mean_absolute_percentage_error as MAPE
+
+#PipelineOp constructor for classification tree
+#The tree itself is defined in TreeHierarchy
+#This constructor follows the expected PipelineOp syntax
+#   input_variable:  the name of the input feature in the xarray
+#   output_variable:  the name of the variable to add/modify in the xarray dataset
+#   model_definition:  A dictionary containing an encoding of a TreeHierarchy object. The encoder is contained in treeHierarchy.
+class ClassificationPipeline(PipelineOp):
+    def __init__(self, input_variable, output_variable, model_definition, name="Classifier"):
+        super().__init__(
+                input_variable=input_variable,
+                output_variable=output_variable,
+                name=name
+        )
+        self.classifier = te.json_decoder(model_definition)
+
+    def set_classifier(self, classifier_instance):
+        self.classifier = classifier_instance
+
+    def calculate(self, dataset):
+        data = self._get_variable(dataset)
+        predicted_classes = self.classifier.predict(np.log10(data))
+        dataset[self.output_variable] = ('sample', predicted_classes)
+        return(self)
+
+#PipelineOp constructor for a regressor
+#This constructor follows the expected PipelineOp syntax, with some important considerations
+#   input_variable:  the name of the input feature in the xarray
+#   output_variable:  the name of the variable to add/modify in the xarray dataset
+#   key_variable: the name of the variable that contains morphology information in the xarray, could be ground_truth_labels, predicted_labels, etc.
+#   morphology: the morphology that this model is trained on
+#   model_Efinition: a dictionary containing a complete definition of a trained classification model, the encoder in TreeHierarchy also works for this
+#NOTE: Each regressor only works for one parameter for one morphology, if multiple morphologies share a parameter i.e., radius is common to many morphologies, then they shuold each operate on the SAME output_variable. 
+#Each RegressionPipeline will only modify output_variable where key_variable==morphology, place mulptiple PipelineOps in the same pipeline to perform regression over all parameters and morphologies
+class RegressionPipeline(PipelineOp):
+    def __init__(self, input_variable, output_variable, key_variable, morphology, model_definition, name="Classifier"):
+        super().__init__(
+                input_variable=input_variable,
+                output_variable=output_variable,
+                name=name
+        )
+        self.key_variable = key_variable
+        self.morphology = morphology
+        self.regression = te.json_decoder(model_definition)
+
+    def calculate(self, dataset):
+        data = self._get_variable(dataset)
+        key = dataset[self.key_variable].data
+        print(np.unique(key))
+        print(self.morphology)
+        inds = np.where(np.equal(key, self.morphology))
+        predictions = self.regression.predict(np.log10(data[inds]))
+        if self.output_variable in dataset.data_vars:
+            output = dataset[self.output_variable].data
+        else:
+            output = np.nan * np.ones(data.shape[0])
+        output[inds] = predictions
+        dataset[self.output_variable] = ('sample', output)
+        return(self)
+
@@ -96,6 +96,10 @@ automation = [
     "requests",
 ]
 
+mlmodels = [
+    "TreeHierarchy @ git+https://github.com/grahamRobertsW/TreeHierarchy"
+]
+
 dev = [
     "black",
     "mypy",
 
@@ -0,0 +1,86 @@
+"""
+Unit tests for the AFL.double_agent.PipelineOp module.
+"""
+
+import pytest
+import numpy as np
+import xarray as xr
+import json
+import os
+
+from tests.utils import MockPipelineOp
+from AFL.double_agent import TreePipeline as tp
+from AFL.double_agent import (Pipeline, LogLogTransform)
+from sklearn.svm import SVC
+from AFL.double_agent.data import (
+    get_data_dir,
+    list_datasets,
+    load_dataset,
+    example_dataset1,
+)
+from TreeHierarchy import (
+    TreeHierarchy,
+    json_decoder
+)
+
+
+@pytest.mark.unit
+class TestClassificationPipeline:
+    """Tests for the PipelineOp class."""
+    def test_classifier_creation(self):
+        data = load_dataset("example_classification_data")
+        classification_def = json.loads(open(os.path.join(get_data_dir(), "example_tree_structure.json"), 'r').read())
+        with Pipeline() as P:
+           LogLogTransform("SAS_curves", "log_sas_curves")
+           pipe = tp.ClassificationPipeline("SAS_curves", "predicted_labels", classification_def)
+           assert isinstance(pipe, tp.ClassificationPipeline)
+           assert isinstance(pipe.classifier, TreeHierarchy)
+           assert isinstance(pipe.classifier.left, TreeHierarchy)
+           assert isinstance(pipe.classifier.right, TreeHierarchy)
+           assert isinstance(pipe.classifier.left.left, TreeHierarchy)
+           assert isinstance(pipe.classifier.left.right, TreeHierarchy)
+           assert isinstance(pipe.classifier.right.left, TreeHierarchy)
+           assert isinstance(pipe.classifier.right.right, TreeHierarchy)
+           assert isinstance(pipe.classifier.entity, SVC)
+           assert isinstance(pipe.classifier.left.entity, SVC)
+           assert isinstance(pipe.classifier.right.entity, SVC)
+
+@pytest.mark.unit
+class TestClassificationPipelineLoaded:
+    """Tests for the PipelineOp class."""
+    def test_classifier_load(self):
+###        data = load_dataset("classification_data")
+###        classification_def = json.loads(open(os.path.join(get_data_dir(), "classification_tree.json"), 'r').read())
+###        pipe = tp.ClassificationPipeline("log_sas_curves", "predicted_labels", classification_def)
+        save_path = os.path.join(get_data_dir(), "classification_pipeline.json")
+        with Pipeline.read_json(str(save_path)) as P:
+           assert isinstance(P[1], tp.ClassificationPipeline)
+           assert isinstance(P[1].classifier, TreeHierarchy)
+           assert isinstance(P[1].classifier.left, TreeHierarchy)
+           assert isinstance(P[1].classifier.right, TreeHierarchy)
+           assert isinstance(P[1].classifier.left.left, TreeHierarchy)
+           assert isinstance(P[1].classifier.left.right, TreeHierarchy)
+           assert isinstance(P[1].classifier.right.left, TreeHierarchy)
+           assert isinstance(P[1].classifier.right.right, TreeHierarchy)
+           assert isinstance(P[1].classifier.entity, SVC)
+           assert isinstance(P[1].classifier.left.entity, SVC)
+           assert isinstance(P[1].classifier.right.entity, SVC)
+
+@pytest.mark.unit
+class TestClassificationPipelinePerformance:
+    """Tests for the PipelineOp class."""
+    def test_classifier_load(self):
+###        data = load_dataset("classification_data")
+###        classification_def = json.loads(open(os.path.join(get_data_dir(), "classification_tree.json"), 'r').read())
+###        pipe = tp.ClassificationPipeline("log_sas_curves", "predicted_labels", classification_def)
+        save_path = os.path.join(get_data_dir(), "classification_pipeline.json")
+        data = load_dataset("example_classification_data")
+        ref = load_dataset("reference_predictions")
+        with Pipeline.read_json(str(save_path)) as P:
+            out = P.calculate(data)
+            print(P[0].output_variable)
+            np.testing.assert_array_equal(out["predicted_test_labels"].data, ref["reference_predictions"].data)
+
+
+
+