Merge pull request #105 from zStupan/feature-zhang

zStupan · web-flow · commit afa52315f54b · 2024-01-14T23:47:30.000+01:00
Implemented Zhang's Metric
diff --git a/docs/getting_started.rst b/docs/getting_started.rst
@@ -334,6 +334,7 @@ The framework currently implements the following interest measures (metrics):
 - Comprehensibility
 - Netconf [#fn]_
 - Yule's Q [#fn]_
+- Zhang's Metric [#fn]_
 
 More information about these interest measures can be found in the API reference
 of the :class:`~niaarm.rule.Rule` class.
diff --git a/interest_measures.md b/interest_measures.md
@@ -192,3 +192,19 @@ and 0 reflects independence)
 **Reference:** E. V. Altay and B. Alatas, "Sensitivity Analysis of MODENAR Method for Mining of Numeric Association
 Rules," 2019 1st International Informatics and Software Engineering Conference (UBMYK), 2019, pp. 1-6,
 doi: 10.1109/UBMYK48245.2019.8965539.
+
+# Zhang's Metric
+
+Zheng's metric measures the strength of association (positive or negative) between the antecedent and consequent, 
+taking into account both their co-occurrence and non-co-occurrence.
+
+```math
+zhang(X \implies Y) =
+\frac{conf(X \implies Y) - conf(\neg X \implies Y)}{max\{conf(X \implies Y), conf(\neg X \implies Y)\}}
+```
+
+**Range:** $`[-1, 1]`$ (-1 reflects total negative association, 1 reflects perfect positive association
+and 0 reflects independence)
+
+**Reference:** T. Zhang, “Association Rules,” in Knowledge Discovery and Data Mining. Current Issues and New 
+Applications, 2000, pp. 245–256. doi: 10.1007/3-540-45571-X_31. 
diff --git a/niaarm/rule.py b/niaarm/rule.py
@@ -145,6 +145,16 @@ class Rule:
          **Reference:** E. V. Altay and B. Alatas, "Sensitivity Analysis of MODENAR Method for Mining of Numeric Association
          Rules," 2019 1st International Informatics and Software Engineering Conference (UBMYK), 2019, pp. 1-6,
          doi: 10.1109/UBMYK48245.2019.8965539.
+        zhang: Zheng's metric measures the strength of association (positive or negative) between the antecedent and consequent, taking into account both their co-occurrence and non-co-occurrence.
+
+         :math:`zhang(X \implies Y) =
+         \frac{conf(X \implies Y) - conf(\neg X \implies Y)}{max\{conf(X \implies Y), conf(\neg X \implies Y)\}}`
+
+         **Range:** :math:`[-1, 1]` (-1 reflects total negative association, 1 reflects perfect positive association
+         and 0 reflects independence)
+
+         **Reference:** T. Zhang, “Association Rules,” in Knowledge Discovery and Data Mining. Current Issues and New
+         Applications, 2000, pp. 245–256. doi: 10.1007/3-540-45571-X_31.
 
     """
 
@@ -176,6 +186,7 @@ class Rule:
         "comprehensibility",
         "netconf",
         "yulesq",
+        "zhang",
     )
 
     def __init__(self, antecedent, consequent, fitness=0.0, transactions=None):
@@ -304,6 +315,17 @@ def comprehensibility(self):
             1 + len(self.antecedent) + len(self.consequent)
         )
 
+    @property
+    def zhang(self):
+        support_x = self.coverage
+        support_y = self.rhs_support
+        support = self.support
+
+        numerator = support - support_x * support_y
+        denominator = max(support * (1 - support_x), support_x * (support_y - support))
+
+        return numerator / denominator
+
     def __eq__(self, other):
         return (
             self.antecedent == other.antecedent and self.consequent == other.consequent
diff --git a/niaarm/rule_list.py b/niaarm/rule_list.py
@@ -122,6 +122,7 @@ def __str__(self):
             f'Average comprehensibility: {self.mean("comprehensibility")}\n'
             f'Average netconf: {self.mean("netconf")}\n'
             f'Average Yule\'s Q: {self.mean("yulesq")}\n'
+            f'Average Zhang\'s Metric: {self.mean("zhang")}\n'
             f"Average antecedent length: {sum(len(rule.antecedent) for rule in self) / len(self)}\n"
             f"Average consequent length: {sum(len(rule.consequent) for rule in self) / len(self)}\n"
         )
diff --git a/tests/test_metrics.py b/tests/test_metrics.py
@@ -66,3 +66,7 @@ def test_netconf(self):
     def test_yulesq(self):
         self.assertAlmostEqual(self.rule_one.yulesq, (6 - 1) / (6 + 1))
         self.assertAlmostEqual(self.rule_two.yulesq, (6 - 1) / (6 + 1))
+
+    def test_zhang(self):
+        self.assertAlmostEqual(self.rule_one.zhang, 5 / 9)
+        self.assertAlmostEqual(self.rule_two.zhang, 5 / 8)

Original file line number	Diff line number	Diff line change
`@@ -122,6 +122,7 @@ def __str__(self):`
`122`	`122`	`f'Average comprehensibility: {self.mean("comprehensibility")}\n'`
`123`	`123`	`f'Average netconf: {self.mean("netconf")}\n'`
`124`	`124`	`f'Average Yule\'s Q: {self.mean("yulesq")}\n'`
	`125`	`+ f'Average Zhang\'s Metric: {self.mean("zhang")}\n'`
`125`	`126`	`f"Average antecedent length: {sum(len(rule.antecedent) for rule in self) / len(self)}\n"`
`126`	`127`	`f"Average consequent length: {sum(len(rule.consequent) for rule in self) / len(self)}\n"`
`127`	`128`	`)`