Unit test updates & black reformatting

smlindauer · smlindauer · commit 34c41e1308d7 · 2023-03-16T10:03:53.000-04:00
diff --git a/src/sasctl/pzmm/import_model.py b/src/sasctl/pzmm/import_model.py
@@ -160,17 +160,16 @@ def import_model(
         be registered in SAS Model Manager.
 
         Generation of the score code requires that the `input_data`, `predict_method`,
-        and `output_variables` arguments are supplied. Otherwise, a warning will be
+        and `score_metrics` arguments are supplied. Otherwise, a warning will be
         generated stating that no score code is being created.
 
         The following are generated by this function if a path is provided in the
         model_files argument:
-        * '*Score.py'
-            The Python score code file for the model.
-        * '*.zip'
-            The zip archive of the relevant model files. In Viya 3.5 the Python score
-            code is not present in this initial zip file.
-
+            * '*Score.py'
+                The Python score code file for the model.
+            * '*.zip'
+                The zip archive of the relevant model files. In Viya 3.5 the Python
+                score code is not present in this initial zip file.
 
         Parameters
         ----------
@@ -194,7 +193,7 @@ def import_model(
             sklearn.tree.DecisionTreeClassifier.predict_proba
             The default value is None.
         output_variables : string list, optional
-            The scoring output_variables for the model. For classification models, it is
+            The scoring score_metrics for the model. For classification models, it is
              assumed that the first value in the list represents the classification
             output. This function supports single and multi-class classification models.
             The default value is None
@@ -219,21 +218,21 @@ def import_model(
             Model details from an MLFlow model. This dictionary is created by the
             read_mlflow_model_file function. The default value is None.
         predict_threshold : float, optional
-            The prediction threshold for normalized probability output_variables. Values
+            The prediction threshold for normalized probability score_metrics. Values
              are expected to be between 0 and 1. The default value is None.
         target_values : list of strings, optional
             A list of target values for the target variable. This argument and the
-            output_variables argument dictate the handling of the predicted values from
+            score_metrics argument dictate the handling of the predicted values from
             the prediction method. The default value is None.
         kwargs : dict, optional
             Other keyword arguments are passed to the following function:
-            * sasctl.pzmm.ScoreCode.write_score_code(...,
-                binary_h2o_model=False,
-                binary_string=None,
-                model_file_name=None,
-                mojo_model=False,
-                statsmodels_model=False
-            )
+                * sasctl.pzmm.ScoreCode.write_score_code(...,
+                    binary_h2o_model=False,
+                    binary_string=None,
+                    model_file_name=None,
+                    mojo_model=False,
+                    statsmodels_model=False
+                )
 
         Returns
         -------
@@ -252,7 +251,7 @@ def import_model(
         if input_data is None or not predict_method or not output_variables:
             warn(
                 "The following arguments are required for the automatic generation of "
-                "score code: input_data, predict_method, output_variables."
+                "score code: input_data, predict_method, score_metrics."
             )
             if isinstance(model_files, dict):
                 zip_io_file = zm.zip_files(model_files, model_prefix, is_viya4=False)
diff --git a/src/sasctl/pzmm/write_json_files.py b/src/sasctl/pzmm/write_json_files.py
@@ -333,7 +333,7 @@ def write_model_properties_json(
         elif isinstance(target_values, list) and len(target_values) == 2:
             model_function = "Classification"
             target_level = "BINARY"
-            target_event = target_values[0]
+            target_event = str(target_values[0])
             event_prob_var = f"P_{target_values[0]}"
         elif isinstance(target_values, list) and len(target_values) > 2:
             model_function = "Classification"
@@ -377,10 +377,10 @@ def write_model_properties_json(
             "trainTable": train_table if train_table else "",
             "trainCodeType": "Python",
             "algorithm": model_algorithm if model_algorithm else "",
-            "target_variable": target_variable if target_variable else "",
-            "target_event": target_event if target_event else "",
-            "target_level": target_level if target_level else "",
-            "event_prob_var": event_prob_var if event_prob_var else "",
+            "targetVariable": target_variable if target_variable else "",
+            "targetEvent": target_event if target_event else "",
+            "targetLevel": target_level if target_level else "",
+            "eventProbVar": event_prob_var if event_prob_var else "",
             "modeler": modeler if modeler else "",
             "tool": "Python 3",
             "toolVersion": python_version,
diff --git a/src/sasctl/tasks.py b/src/sasctl/tasks.py
@@ -109,15 +109,15 @@ def _register_sklearn_35():
 
 
 def _register_sklearn_40(model, model_name, project_name, input_data, output_data=None):
-
     # TODO: if not sklearn, raise ValueError
 
     model_info = _sklearn_to_dict(model)
 
     with TemporaryDirectory() as folder:
-
         # Write model to a pickle file
-        pzmm.PickleModel.pickle_trained_model(model, model_name, folder)  # generates folder/name.pickle
+        pzmm.PickleModel.pickle_trained_model(
+            model, model_name, folder
+        )  # generates folder/name.pickle
 
         # Create a JSON file containing model input fields
         pzmm.JSONFiles.write_var_json(input_data, is_input=True, json_path=folder)
@@ -131,27 +131,34 @@ def _register_sklearn_40(model, model_name, project_name, input_data, output_dat
                     output_fields.columns = ["EM_CLASSIFICATION"]
                 else:
                     output_fields.name = "EM_CLASSIFICATION"
-                pzmm.JSONFiles.write_var_json(output_fields, is_input=False, json_path=folder)
+                pzmm.JSONFiles.write_var_json(
+                    output_fields, is_input=False, json_path=folder
+                )
             else:
-                pzmm.JSONFiles.write_var_json(output_data, is_input=False, json_path=folder)
+                pzmm.JSONFiles.write_var_json(
+                    output_data, is_input=False, json_path=folder
+                )
         # target_variable
         # target_event (e.g 1 for binary)
         # num_target_event
         # event_prob
 
         # TODO: allow passing description in register_model()
 
-        pzmm.JSONFiles.write_model_properties_json(model_name,
-                                                   target_event=None,
-                                                   target_variable=None,
-                                                   num_target_categories=1,
-                                                   model_desc=model_info["description"],
-                                                   model_function=model_info["function"],
-                                                   model_type=model_info["algorithm"],
-                                                   json_path=folder
-                                                   )
+        pzmm.JSONFiles.write_model_properties_json(
+            model_name,
+            target_values=None,
+            target_variable=None,
+            num_target_categories=1,
+            model_desc=model_info["description"],
+            model_function=model_info["function"],
+            model_algorithm=model_info["algorithm"],
+            json_path=folder,
+        )
 
-        pzmm.JSONFiles.write_file_metadata_json(model_name, json_path=folder, is_h2o_model=False)
+        pzmm.JSONFiles.write_file_metadata_json(
+            model_name, json_path=folder, is_h2o_model=False
+        )
 
         predict_method = (
             "{}.predict_proba({})"
@@ -616,8 +623,8 @@ def publish_model(
 
     See Also
     --------
-    :meth:`model_management.publish_model <.ModelManagement.publish_model>`
-    :meth:`model_publish.publish_model <.ModelPublish.publish_model>`
+    model_management.publish_model
+    model_publish.publish_model
 
 
     .. versionchanged:: 1.1.0
@@ -735,7 +742,7 @@ def update_model_performance(data, model, label, refresh=True):
 
     See Also
     --------
-     :meth:`model_management.create_performance_definition <.ModelManagement.create_performance_definition>`
+    model_management.create_performance_definition
 
     .. versionadded:: v1.3
 
diff --git a/src/sasctl/utils/astore.py b/src/sasctl/utils/astore.py
@@ -311,7 +311,7 @@ def get_variable_properties(var):
         "interval": "",
         "num": "decimal",
         "character": "string",
-        "varchar": "string"
+        "varchar": "string",
     }
 
     meta = {"name": var.Name.strip(), "length": int(var.Length)}
diff --git a/tests/unit/test_model_parameters.py b/tests/unit/test_model_parameters.py
@@ -89,19 +89,20 @@ def test_bad_model_hyperparameters(self, bad_model):
             mp.generate_hyperparameters(bad_model, self.MODEL_NAME, Path(tmp_dir.name))
 
     def test_update_json(self):
-        from sasctl.pzmm.model_parameters import _update_json
+        from sasctl.pzmm.model_parameters import ModelParameters as mp
 
         # ensure that only relevant rows are added to hyperparameter json
 
         input_json = copy.deepcopy(self.TESTJSON)
         input_kpis = copy.deepcopy(self.KPIS)
         assert (
-            _update_json(self.MODELS[1]["id"], input_json, input_kpis) == self.TESTJSON
+            mp._update_json(self.MODELS[1]["id"], input_json, input_kpis)
+            == self.TESTJSON
         )
 
         input_json = copy.deepcopy(self.TESTJSON)
         input_kpis = copy.deepcopy(self.KPIS)
-        updated_json = _update_json(self.MODELS[0]["id"], input_json, input_kpis)
+        updated_json = mp._update_json(self.MODELS[0]["id"], input_json, input_kpis)
 
         pd.testing.assert_frame_equal(input_kpis, self.KPIS)
         assert "hyperparameters" in updated_json
diff --git a/tests/unit/test_write_json_files.py b/tests/unit/test_write_json_files.py
@@ -218,17 +218,21 @@ def test_write_model_properties_json():
     prop_dict = jf.write_model_properties_json(
         model_name="Test_Model",
         target_variable="BAD",
-        target_values=[4, 3, 1, 4],
+        target_values=[4, 3, 1, 5],
     )
-    assert json.loads(prop_dict["ModelProperties.json"])["target_level"] == "NOMINAL"
+    assert json.loads(prop_dict["ModelProperties.json"])["targetLevel"] == "NOMINAL"
     assert json.loads(prop_dict["ModelProperties.json"])["properties"] == [
-        {"name": "multiclass_target_events", "value": "4, 3, 1, 4", "type": "string"},
-        {"name": "multiclass_proba_variables", "value": "A, B, C, D", "type": "string"},
+        {"name": "multiclass_target_events", "value": "4, 3, 1, 5", "type": "string"},
+        {
+            "name": "multiclass_proba_variables",
+            "value": "P_4, P_3, P_1, P_5",
+            "type": "string",
+        },
     ]
 
     with pytest.warns():
         prop_dict = jf.write_model_properties_json(
-            model_name="Test_Model", target_variable="BAD", model_desc="a" * 1000
+            model_name="Test_Model", target_variable="BAD", model_desc="a" * 10000
         )
         assert len(json.loads(prop_dict["ModelProperties.json"])["description"]) <= 1024
 
diff --git a/tests/unit/test_write_score_code.py b/tests/unit/test_write_score_code.py
@@ -634,34 +634,27 @@ def test_predictions_to_metrics():
     """
     with patch("sasctl.pzmm.ScoreCode._no_targets_no_thresholds") as func:
         metrics = ["Classification"]
-        sc._predictions_to_metrics(metrics)
-        func.assert_called_once_with("Classification", False)
+        returns = [1]
+        sc._predictions_to_metrics(metrics, returns)
+        func.assert_called_once_with("Classification", returns, False)
 
     with patch("sasctl.pzmm.ScoreCode._nonbinary_targets") as func:
         target_values = ["A", "B", 5]
-        sc._predictions_to_metrics(metrics, target_values)
-        func.assert_called_once_with("Classification", target_values, False)
+        sc._predictions_to_metrics(metrics, returns, target_values)
+        func.assert_called_once_with("Classification", target_values, returns, False)
 
     with patch("sasctl.pzmm.ScoreCode._binary_target") as func:
         metrics = ["Classification", "Probability"]
-        target_values = ["1"]
-        sc._predictions_to_metrics(metrics, target_values)
-        func.assert_called_once_with(metrics, None, False)
-
-    with pytest.raises(
-        ValueError,
-        match="For non-binary target variables, please provide at least two target "
-        "values.",
-    ):
-        target_values = ["2"]
-        sc._predictions_to_metrics(metrics, target_values)
+        target_values = ["1", "0"]
+        sc._predictions_to_metrics(metrics, returns, target_values)
+        func.assert_called_once_with(metrics, ["1", "0"], returns, None, False)
 
     with pytest.raises(
         ValueError,
         match="A threshold was provided to interpret the prediction results, however "
         "a target value was not, therefore, a valid output cannot be generated.",
     ):
-        sc._predictions_to_metrics(metrics, predict_threshold=0.7)
+        sc._predictions_to_metrics(metrics, returns, predict_threshold=0.7)
 
 
 def test_input_var_lists():
@@ -692,6 +685,7 @@ def test_check_viya_version(mock_version, mock_get_model):
         - Viya 4
         - No connection
     """
+    current_session(None)
     mock_version.return_value = None
     model = {"name": "Test", "id": "abc123"}
     with pytest.warns():
@@ -744,29 +738,28 @@ def test_write_score_code(score_code_mocks):
     score_code_mocks["_viya35_score_code_import"].return_value = ("MAS", "CAS")
     score_code_mocks["_check_valid_model_prefix"].return_value = "TestModel"
 
+    # No binary string or model file provided
     with pytest.raises(ValueError):
         sc.write_score_code(
             "TestModel",
             pd.DataFrame(data=[["A", 1], ["B", 2]], columns=["First", "Second"]),
-            predict_proba,
-            ["C", "P"],
+            [predict_proba, []],
         )
 
+    # Binary string and model file provided
     with pytest.raises(ValueError):
         sc.write_score_code(
             "TestModel",
             pd.DataFrame(data=[["A", 1], ["B", 2]], columns=["First", "Second"]),
-            predict_proba,
-            ["C", "P"],
+            [predict_proba, []],
             model_file_name="model.pickle",
             binary_string=b"Binary model string.",
         )
 
     sc.write_score_code(
         "TestModel",
         pd.DataFrame(data=[["A", 1], ["B", 2]], columns=["First", "Second"]),
-        predict_proba,
-        ["C", "P"],
+        [predict_proba, []],
         model_file_name="model.pickle",
     )
     score_code_mocks["_viya4_model_load"].assert_called_once()
@@ -775,17 +768,15 @@ def test_write_score_code(score_code_mocks):
     sc.write_score_code(
         "TestModel",
         pd.DataFrame(data=[["A", 1], ["B", 2]], columns=["First", "Second"]),
-        predict_proba,
-        ["C", "P"],
+        [predict_proba, []],
         model_file_name="model.pickle",
     )
     score_code_mocks["_viya35_model_load"].assert_called_once()
 
     output_dict = sc.write_score_code(
         "TestModel",
         pd.DataFrame(data=[["A", 1], ["B", 2]], columns=["First", "Second"]),
-        predict_proba,
-        ["C", "P"],
+        [predict_proba, []],
         binary_string=b"Binary model string.",
     )
     assert "TestModel_score.py" in output_dict
@@ -796,8 +787,7 @@ def test_write_score_code(score_code_mocks):
     sc.write_score_code(
         "TestModel",
         pd.DataFrame(data=[["A", 1], ["B", 2]], columns=["First", "Second"]),
-        predict_proba,
-        ["C", "P"],
+        [predict_proba, []],
         score_code_path=Path(tmp_dir.name),
         binary_string=b"Binary model string.",
     )

Original file line number	Diff line number	Diff line change
`@@ -311,7 +311,7 @@ def get_variable_properties(var):`
`311`	`311`	`"interval": "",`
`312`	`312`	`"num": "decimal",`
`313`	`313`	`"character": "string",`
`314`		`- "varchar": "string"`
	`314`	`+ "varchar": "string",`
`315`	`315`	`}`
`316`	`316`
`317`	`317`	`meta = {"name": var.Name.strip(), "length": int(var.Length)}`