Merge pull request #43 from zStupan/main

zStupan · web-flow · commit 671bfd180e31 · 2022-04-06T09:36:41.000+02:00
Updated RuleList
diff --git a/README.md b/README.md
@@ -15,7 +15,7 @@
 [![Average time to resolve an issue](http://isitmaintained.com/badge/resolution/firefly-cpp/niaarm.svg)](http://isitmaintained.com/project/firefly-cpp/niaarm "Average time to resolve an issue")
 
 ## General outline of the framework
-NiaARM is a framework for Association Rule Mining based on nature-inspired algorithms for optimization. The framework is written fully in Python and runs on all platforms. NiaARM allows users to preprocess the data in a transaction database automatically, to search for association rules and provide a pretty output of the rules found. This framework also supports numerical and real-valued types of attributes besides the categorical ones. Mining the association rules is defined as an optimization problem, and solved using the nature-inspired algorithms that come from the related framework called [NiaPy](https://github.com/NiaOrg/NiaPy).
+NiaARM is a framework for Association Rule Mining based on nature-inspired algorithms for optimization. The framework is written fully in Python and runs on all platforms. NiaARM allows users to preprocess the data in a transaction database automatically, to search for association rules and provide a pretty output of the rules found. This framework also supports integral and real-valued types of attributes besides the categorical ones. Mining the association rules is defined as an optimization problem, and solved using the nature-inspired algorithms that come from the related framework called [NiaPy](https://github.com/NiaOrg/NiaPy).
 
 ## Detailed insights
 The current version includes (but is not limited to) the following functions:
@@ -44,19 +44,63 @@ $ apk add py3-niaarm
 
 ## Usage
 
-### Basic example
+### Loading data
 
-In this example we'll use Differential Evolution to mine association rules on the Abalone Dataset.
+In NiaARM, data loading is done via the `Dataset` class. There are two options for loading data:
+
+#### Option 1: From a pandas DataFrame (recommended)
+
+```python
+import pandas as pd
+from niaarm import Dataset
+
+
+df = pd.read_csv('datasets/Abalone.csv')
+# preprocess data...
+data = Dataset(df)
+print(data) # printing the dataset will generate a feature report
+```
+
+#### Option 2: From CSV file directly
+
+```python
+from niaarm import Dataset
+
+
+data = Dataset('datasets/Abalone.csv')
+print(data)
+```
+
+### Mining association rules the easy way (recommended)
+
+Association rule mining can be easily performed using the `get_rules` function:
+
+```python
+
+from niaarm import get_rules
+from niapy.algorithms.basic import DifferentialEvolution
+
+algo = DifferentialEvolution(population_size=50, differential_weight=0.5, crossover_probability=0.9)
+metrics = ('support', 'confidence')
+
+rules, run_time = get_rules(data, algo, metrics, max_iters=30, logging=True)
+
+print(rules) # Prints basic stats about the mined rules
+print(f'Run Time: {run_time}')
+rules.to_csv('output.csv')
+```
+
+### Mining association rules the hard way
+
+The above example can be also be implemented using a more low level interface,
+with the `NiaARM` class directly:
 
 ```python
 from niaarm import NiaARM, Dataset
 from niapy.algorithms.basic import DifferentialEvolution
 from niapy.task import Task, OptimizationType
 
 
-# load and preprocess the dataset from csv
-data = Dataset("datasets/Abalone.csv")
-
 # Create a problem:::
 # dimension represents the dimension of the problem;
 # features represent the list of features, while transactions depicts the list of transactions
@@ -82,29 +126,8 @@ problem.rules.sort()
 problem.rules.to_csv('output.csv')
 ```
 
-#### Simplified
-
-The above example can be further simplified with the use of ``niaarm.mine.get_rules()``:
-
-```python
-
-from niaarm import Dataset, get_rules
-from niapy.algorithms.basic import DifferentialEvolution
-
-
-data = Dataset("datasets/Abalone.csv")
-algo = DifferentialEvolution(population_size=50, differential_weight=0.5, crossover_probability=0.9)
-metrics = ('support', 'confidence')
-
-rules, run_time = get_rules(data, algo, metrics, max_iters=30, logging=True)
-
-print(rules)
-print(f'Run Time: {run_time}')
-rules.to_csv('output.csv')
-
-```
-
-For a full list of examples see the [examples folder](examples/).
+For a full list of examples see the [examples folder](https://github.com/firefly-cpp/NiaARM/tree/main/examples)
+in the GitHub repository.
 
 ### Command line interface
 
diff --git a/examples/stats.py b/examples/stats.py
diff --git a/examples/working_with_rule_list.py b/examples/working_with_rule_list.py
@@ -0,0 +1,30 @@
+"""
+Example usage of the RuleList class. The RuleList class is a wrapper around a python list, with some added features, mainly
+getting statistical data of rule metrics and sorting by metric.
+"""
+
+
+from niaarm import NiaARM, Dataset
+from niapy.algorithms.basic import DifferentialEvolution
+from niapy.task import Task, OptimizationType
+
+
+if __name__ == '__main__':
+    # Load the dataset and run the algorithm
+    data = Dataset("datasets/Abalone.csv")
+    problem = NiaARM(data.dimension, data.features, data.transactions, metrics=('support', 'confidence'))
+    task = Task(problem=problem, max_iters=30, optimization_type=OptimizationType.MAXIMIZATION)
+    algo = DifferentialEvolution(population_size=50, differential_weight=0.5, crossover_probability=0.9)
+    algo.run(task=task)
+
+    # print the RuleList to get basic data about the mined rules.
+    print(problem.rules)
+
+    # RuleList also provides methods for getting the min, max, mean and std. dev. of metrics:
+    print('Min support', problem.rules.min('support'))
+    print('Max support', problem.rules.max('support'))
+    print('Mean support', problem.rules.mean('support'))
+    print('Std support', problem.rules.std('support'))
+
+    # you can also use RuleList.get to get all values of a metric as a numpy array:
+    print(problem.rules.get('support'))
diff --git a/niaarm/rule_list.py b/niaarm/rule_list.py
@@ -5,33 +5,26 @@
 
 
 class RuleList(UserList):
-    """A wrapper around a list of rules.
-
-    Attributes:
-        mean_fitness (float): Mean fitness.
-        mean_support (float): Mean support.
-        mean_confidence (float): Mean confidence.
-        mean_lift (float): Mean lift.
-        mean_coverage (float): Mean coverage.
-        mean_rhs_support (float): Mean consequent support.
-        mean_conviction (float): Mean conviction.
-        mean_inclusion (float): Mean inclusion.
-        mean_amplitude (float): Mean amplitude.
-        mean_interestingness (float): Mean interestingness.
-        mean_comprehensibility (float): Mean comprehensibility.
-        mean_netconf (float): Mean netconf.
-        mean_yulesq (float): Mean Yule's Q.
-        mean_antecedent_length (float): Mean antecedent length.
-        mean_consequent_length (float): Mean consequent length.
-
-    """
+    """A list of rules."""
+
+    def get(self, metric):
+        """Get values of `metric` for each rule as a numpy array.
+
+        Args:
+            metric (str): Metric.
+
+        Returns:
+            numpy.ndarray: Array of `metric` for all rules.
+
+        """
+        return np.array([getattr(rule, metric) for rule in self.data])
 
     def sort(self, by='fitness', reverse=True):
         """Sort rules by metric.
 
         Args:
             by (str): Metric to sort rules by. Default: ``'fitness'``.
-            reverse (bool): Sort in descending order. Default: ``True``
+            reverse (bool): Sort in descending order. Default: ``True``.
 
         """
         self.data.sort(key=lambda rule: getattr(rule, by), reverse=reverse)
@@ -46,7 +39,7 @@ def mean(self, metric):
             float: Mean value of metric in rule list.
 
         """
-        return np.mean([getattr(rule, metric) for rule in self.data])
+        return sum(getattr(rule, metric) for rule in self.data) / len(self.data)
 
     def min(self, metric):
         """Get min value of metric.
@@ -97,87 +90,25 @@ def to_csv(self, filename):
             # write header
             writer.writerow(("antecedent", "consequent", "fitness") + Rule.metrics)
 
-            for rule in self:
+            for rule in self.data:
                 writer.writerow(
                     [rule.antecedent, rule.consequent, rule.fitness] + [getattr(rule, metric) for metric in Rule.metrics])
         print(f"Rules exported to {filename}")
 
-    @property
-    def mean_fitness(self):
-        return np.mean([rule.fitness for rule in self.data])
-
-    @property
-    def mean_support(self):
-        return np.mean([rule.support for rule in self.data])
-
-    @property
-    def mean_confidence(self):
-        return np.mean([rule.confidence for rule in self.data])
-
-    @property
-    def mean_lift(self):
-        return np.mean([rule.lift for rule in self.data])
-
-    @property
-    def mean_coverage(self):
-        return np.mean([rule.coverage for rule in self.data])
-
-    @property
-    def mean_rhs_support(self):
-        return np.mean([rule.rhs_support for rule in self.data])
-
-    @property
-    def mean_conviction(self):
-        return np.mean([rule.conviction for rule in self.data])
-
-    @property
-    def mean_inclusion(self):
-        return np.mean([rule.inclusion for rule in self.data])
-
-    @property
-    def mean_amplitude(self):
-        return np.mean([rule.amplitude for rule in self.data])
-
-    @property
-    def mean_interestingness(self):
-        return np.mean([rule.interestingness for rule in self.data])
-
-    @property
-    def mean_comprehensibility(self):
-        return np.mean([rule.comprehensibility for rule in self.data])
-
-    @property
-    def mean_netconf(self):
-        return np.mean([rule.netconf for rule in self.data])
-
-    @property
-    def mean_yulesq(self):
-        return np.mean([rule.yulesq for rule in self.data])
-
-    @property
-    def mean_antecedent_length(self):
-        return np.mean([len(rule.antecedent) for rule in self.data])
-
-    @property
-    def mean_consequent_length(self):
-        return np.mean([len(rule.consequent) for rule in self.data])
-
     def __str__(self):
         string = f'STATS:\n' \
                  f'Total rules: {len(self)}\n' \
-                 f'Average fitness: {self.mean_fitness}\n' \
-                 f'Average support: {self.mean_support}\n' \
-                 f'Average confidence: {self.mean_confidence}\n' \
-                 f'Average lift: {self.mean_lift}\n' \
-                 f'Average coverage: {self.mean_coverage}\n' \
-                 f'Average consequent support: {self.mean_rhs_support}\n' \
-                 f'Average conviction: {self.mean_conviction}\n' \
-                 f'Average amplitude: {self.mean_amplitude}\n' \
-                 f'Average inclusion: {self.mean_inclusion}\n' \
-                 f'Average interestingness: {self.mean_interestingness}\n' \
-                 f'Average comprehensibility: {self.mean_comprehensibility}\n' \
-                 f'Average netconf: {self.mean_netconf}\n' \
-                 f'Average Yule\'s Q: {self.mean_yulesq}\n' \
-                 f'Average length of antecedent: {self.mean_antecedent_length}\n' \
-                 f'Average length of consequent: {self.mean_consequent_length}'
+                 f'Average fitness: {self.mean("fitness")}\n' \
+                 f'Average support: {self.mean("support")}\n' \
+                 f'Average confidence: {self.mean("confidence")}\n' \
+                 f'Average lift: {self.mean("lift")}\n' \
+                 f'Average coverage: {self.mean("coverage")}\n' \
+                 f'Average consequent support: {self.mean("rhs_support")}\n' \
+                 f'Average conviction: {self.mean("conviction")}\n' \
+                 f'Average amplitude: {self.mean("amplitude")}\n' \
+                 f'Average inclusion: {self.mean("inclusion")}\n' \
+                 f'Average interestingness: {self.mean("interestingness")}\n' \
+                 f'Average comprehensibility: {self.mean("comprehensibility")}\n' \
+                 f'Average netconf: {self.mean("netconf")}\n' \
+                 f'Average Yule\'s Q: {self.mean("yulesq")}\n'
         return string