added parameters class_for_ir_statistics, attribute_id, pred_target_column to constructor of SimpleExperiment class and derived classes that configure the ClassifierSplitEvaluator accordingly

fracpete · fracpete · commit 69b0934a12d0 · 2022-06-09T09:47:59.000+12:00
diff --git a/CHANGES.rst b/CHANGES.rst
@@ -10,6 +10,8 @@ Changelog
   train/test tuples as used by cross-validation
 - the `Tester` class (module: `weka.experiments`) now has an option to swap columns/rows for comparing
   datasets rather than classifiers
+- the `SimpleExperiment` class and derived classes (module: `weka.experiments`) now have the additional
+  parameters in the constructor: class_for_ir_statistics, attribute_id, pred_target_column
 - ...
 
 
diff --git a/doc/source/examples.rst b/doc/source/examples.rst
@@ -720,6 +720,14 @@ Here is an example for performing a cross-validated classification experiment:
    print(tester.multi_resultset_full(1, comparison_col))
 
 
+Other parameters that can be supplied to the constructor of the `SimpleCrossValidationExperiment` or
+`SimpleRandomSplitExperiment` classes:
+
+* `class_for_ir_statistics` - defines the class label to use for computing IR statistics like AUC
+* `attribute_id` - the 0-based index of the attribute that identifies rows
+* `pred_target_column` - for outputting the predictions and ground truth in separate columns in case of classification, e.g., for calculating confusion matrices manually afterwards
+
+
 And a setup for performing regression experiments on random splits on the datasets:
 
 .. code-block:: python
@@ -753,6 +761,15 @@ And a setup for performing regression experiments on random splits on the datase
    print(tester.multi_resultset_full(0, comparison_col))
 
 
+The `Tester` class allows you to swap columns and rows, therefore comparing datasets rather than classifiers:
+
+.. code-block:: python
+
+   tester = Tester(classname="weka.experiment.PairedCorrectedTTester")
+   tester.swap_rows_and_cols = True
+   tester.resultmatrix = matrix
+
+
 Partial classnames
 ------------------
 
diff --git a/python/weka/experiments.py b/python/weka/experiments.py
@@ -53,7 +53,8 @@ class SimpleExperiment(OptionHandler):
     http://weka.wikispaces.com/Using+the+Experiment+API
     """
 
-    def __init__(self, datasets, classifiers, jobject=None, classification=True, runs=10, result=None):
+    def __init__(self, datasets, classifiers, jobject=None, classification=True, runs=10, result=None,
+                 class_for_ir_statistics=0, attribute_id=-1, pred_target_column=False):
         """
         Initializes the experiment.
 
@@ -69,6 +70,12 @@ def __init__(self, datasets, classifiers, jobject=None, classification=True, run
         :type runs: int
         :param result: the filename of the file to store the results in
         :type result: str
+        :param class_for_ir_statistics: the class label index to use IR statistics (classification only)
+        :type class_for_ir_statistics: int
+        :param attribute_id: the 0-based index of the attribute identifying instances (classification only)
+        :type attribute_id: int
+        :param pred_target_column: whether to store the predicted and target columns as well (classification only)
+        :type pred_target_column: bool
         """
 
         if not jobject is None:
@@ -81,6 +88,9 @@ def __init__(self, datasets, classifiers, jobject=None, classification=True, run
         self.datasets = datasets[:]
         self.classifiers = classifiers[:]
         self.result = result
+        self.class_for_ir_statistics = class_for_ir_statistics
+        self.attribute_id = attribute_id
+        self.pred_target_column = pred_target_column
         super(SimpleExperiment, self).__init__(jobject=jobject)
 
     def configure_splitevaluator(self):
@@ -92,8 +102,12 @@ def configure_splitevaluator(self):
         """
         if self.classification:
             speval = javabridge.make_instance("weka/experiment/ClassifierSplitEvaluator", "()V")
+            javabridge.call(speval, "setClassForIRStatistics", "(I)V", self.class_for_ir_statistics)
+            javabridge.call(speval, "setAttributeID", "(I)V", self.attribute_id)
+            javabridge.call(speval, "setPredTargetColumn", "(Z)V", self.pred_target_column)
         else:
             speval = javabridge.make_instance("weka/experiment/RegressionSplitEvaluator", "()V")
+
         classifier = javabridge.call(speval, "getClassifier", "()Lweka/classifiers/Classifier;")
         return speval, classifier
 
@@ -221,7 +235,8 @@ class SimpleCrossValidationExperiment(SimpleExperiment):
     Performs a simple cross-validation experiment. Can output the results either in ARFF or CSV.
     """
 
-    def __init__(self, datasets, classifiers, classification=True, runs=10, folds=10, result=None):
+    def __init__(self, datasets, classifiers, classification=True, runs=10, folds=10, result=None,
+                 class_for_ir_statistics=0, attribute_id=-1, pred_target_column=False):
         """
         Initializes the experiment.
 
@@ -237,6 +252,12 @@ def __init__(self, datasets, classifiers, classification=True, runs=10, folds=10
         :type folds: int
         :param result: the filename of the file to store the results in
         :type result: str
+        :param class_for_ir_statistics: the class label index to use IR statistics (classification only)
+        :type class_for_ir_statistics: int
+        :param attribute_id: the 0-based index of the attribute identifying instances (classification only)
+        :type attribute_id: int
+        :param pred_target_column: whether to store the predicted and target columns as well (classification only)
+        :type pred_target_column: bool
         """
 
         if runs < 1:
@@ -252,7 +273,9 @@ def __init__(self, datasets, classifiers, classification=True, runs=10, folds=10
 
         super(SimpleCrossValidationExperiment, self).__init__(
             classification=classification, runs=runs, datasets=datasets,
-            classifiers=classifiers, result=result)
+            classifiers=classifiers, result=result,
+            class_for_ir_statistics=class_for_ir_statistics, attribute_id=attribute_id,
+            pred_target_column=pred_target_column)
 
         self.folds = folds
 
@@ -293,7 +316,7 @@ class SimpleRandomSplitExperiment(SimpleExperiment):
     """
 
     def __init__(self, datasets, classifiers, classification=True, runs=10, percentage=66.6, preserve_order=False,
-                 result=None):
+                 result=None, class_for_ir_statistics=0, attribute_id=-1, pred_target_column=False):
         """
         Initializes the experiment.
 
@@ -311,6 +334,12 @@ def __init__(self, datasets, classifiers, classification=True, runs=10, percenta
         :type classifiers: list
         :param result: the filename of the file to store the results in
         :type result: str
+        :param class_for_ir_statistics: the class label index to use IR statistics (classification only)
+        :type class_for_ir_statistics: int
+        :param attribute_id: the 0-based index of the attribute identifying instances (classification only)
+        :type attribute_id: int
+        :param pred_target_column: whether to store the predicted and target columns as well (classification only)
+        :type pred_target_column: bool
         """
 
         if runs < 1:
@@ -328,7 +357,9 @@ def __init__(self, datasets, classifiers, classification=True, runs=10, percenta
 
         super(SimpleRandomSplitExperiment, self).__init__(
             classification=classification, runs=runs, datasets=datasets,
-            classifiers=classifiers, result=result)
+            classifiers=classifiers, result=result,
+            class_for_ir_statistics=class_for_ir_statistics, attribute_id=attribute_id,
+            pred_target_column=pred_target_column)
 
         self.percentage = percentage
         self.preserve_order = preserve_order