Skip to content

Commit 50f67d1

Browse files
authored
Merge pull request #17 from zStupan/main
Add stats example, fix linter errors
2 parents 363dd93 + a455701 commit 50f67d1

File tree

14 files changed

+202
-154
lines changed

14 files changed

+202
-154
lines changed

examples/stats.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
"""
2+
Example usage of the Stats class. We perform association rule mining from the basic run example and then print
3+
out a statistical analysis of the mined association rules
4+
"""
5+
6+
7+
from niaarm import NiaARM
8+
from niaarm.dataset import Dataset
9+
from niaarm.stats import Stats
10+
from niapy.algorithms.basic import DifferentialEvolution
11+
from niapy.task import Task, OptimizationType
12+
13+
14+
if __name__ == '__main__':
15+
# Load the dataset and run the algorithm
16+
data = Dataset("datasets/Abalone.csv")
17+
problem = NiaARM(data.dimension, data.features, data.transactions, alpha=1.0, beta=1.0)
18+
task = Task(problem=problem, max_iters=30, optimization_type=OptimizationType.MAXIMIZATION)
19+
algo = DifferentialEvolution(population_size=50, differential_weight=0.5, crossover_probability=0.9)
20+
algo.run(task=task)
21+
22+
# Instantiate Stats object and print basic statistics of mined rules.
23+
stats = Stats(problem.rules)
24+
25+
print('\nSTATS:')
26+
print(f'Total rules: {stats.total_rules}')
27+
print(f'Average fitness: {stats.avg_fitness}')
28+
print(f'Average support: {stats.avg_support}')
29+
print(f'Average confidence: {stats.avg_confidence}')
30+
print(f'Average coverage: {stats.avg_coverage}')
31+
print(f'Average shrinkage: {stats.avg_shrinkage}')
32+
print(f'Average length of antecedent: {stats.avg_ant_len}')
33+
print(f'Average length of consequent: {stats.avg_con_len}')

niaarm/__init__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
11
from niaarm.niaarm import NiaARM
2+
from niaarm.dataset import Dataset
3+
from niaarm.stats import Stats
4+
5+
6+
__all__ = [NiaARM, Dataset, Stats]
27

38
__version__ = "0.1.0"

niaarm/association_rule.py

Lines changed: 59 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -74,55 +74,60 @@ def feature_position(self, feature):
7474
position = position + 2
7575
return position
7676

77-
def support_confidence(self, antecedent, consequence, transactions):
77+
def __match_antecedent(self, i, antecedent, transactions):
78+
match = 0
79+
for j in range(len(antecedent)):
80+
dtype = self.features[self.permutation[j]].dtype
81+
if dtype == 'float' or dtype == 'int':
82+
if antecedent[j] != 'NO':
83+
border = antecedent[j]
84+
if border[0] <= transactions[i, self.permutation[j]] <= border[1]:
85+
match = match + 1
86+
elif dtype == 'cat':
87+
if antecedent[j] != 'NO':
88+
ant = antecedent[j]
89+
if transactions[i, self.permutation[j]] == ant[0]:
90+
match = match + 1
91+
return match
92+
93+
def __match_consequent(self, i, antecedent, consequent, transactions):
94+
match = 0
95+
con_counter = 0
96+
for ll in range(len(antecedent), len(antecedent) + len(consequent)):
97+
dtype = self.features[self.permutation[ll]].dtype
98+
if dtype == 'float' or dtype == 'int':
99+
if consequent[con_counter] != 'NO':
100+
border = consequent[con_counter]
101+
if border[0] <= transactions[i, self.permutation[ll]] <= border[1]:
102+
match = match + 1
103+
elif dtype == 'cat':
104+
if consequent[con_counter] != 'NO':
105+
con = consequent[con_counter]
106+
107+
if transactions[i, self.permutation[ll]] == con[0]:
108+
match = match + 1
109+
110+
con_counter = con_counter + 1
111+
return match
112+
113+
def support_confidence(self, antecedent, consequent, transactions):
78114
supp = 0
79115
conf = 0
80116
conf_counter = 0
81117

82-
# firstly antecedent
83118
for i in range(len(transactions)):
84-
match1 = 0
85-
match2 = 0
86-
for j in range(len(antecedent)):
87-
dtype = self.features[self.permutation[j]].dtype
88-
if dtype == 'float' or dtype == 'int':
89-
if antecedent[j] != 'NO':
90-
border = antecedent[j]
91-
if border[0] <= transactions[i, self.permutation[j]] <= border[1]:
92-
match1 = match1 + 1
93-
elif dtype == 'cat':
94-
if antecedent[j] != 'NO':
95-
ant = antecedent[j]
96-
if transactions[i, self.permutation[j]] == ant[0]:
97-
match1 = match1 + 1
98-
99-
# secondly consequence
100-
con_counter = 0
101-
for ll in range(len(antecedent), len(antecedent) + len(consequence)):
102-
dtype = self.features[self.permutation[ll]].dtype
103-
if dtype == 'float' or dtype == 'int':
104-
if consequence[con_counter] != 'NO':
105-
border = consequence[con_counter]
106-
if border[0] <= transactions[i, self.permutation[ll]] <= border[1]:
107-
match2 = match2 + 1
108-
elif dtype == 'cat':
109-
if consequence[con_counter] != 'NO':
110-
con = consequence[con_counter]
111-
112-
if transactions[i, self.permutation[ll]] == con[0]:
113-
match2 = match2 + 1
114-
115-
con_counter = con_counter + 1
119+
match_antecedent = self.__match_antecedent(i, antecedent, transactions)
120+
match_consequent = self.__match_consequent(i, antecedent, consequent, transactions)
116121

117122
missing_ant = antecedent.count('NO')
118-
missing_con = consequence.count('NO')
123+
missing_con = consequent.count('NO')
119124

120-
if (missing_ant + match1) == len(antecedent):
125+
if (missing_ant + match_antecedent) == len(antecedent):
121126
conf_counter += 1
122-
if (missing_con + match2) == len(consequence):
127+
if (missing_con + match_consequent) == len(consequent):
123128
conf = conf + 1
124129

125-
total = match1 + match2 + missing_ant + missing_con
130+
total = match_antecedent + match_consequent + missing_ant + missing_con
126131

127132
if total == len(self.features):
128133
supp = supp + 1
@@ -138,11 +143,11 @@ def support_confidence(self, antecedent, consequence, transactions):
138143

139144
return total_supp, total_conf
140145

141-
def coverage(self, antecedent, consequence):
142-
missing_total = antecedent.count("NO") + consequence.count("NO")
146+
def coverage(self, antecedent, consequent):
147+
missing_total = antecedent.count("NO") + consequent.count("NO")
143148
return 1 - missing_total / len(self.features)
144149

145-
def shrinkage(self, antecedent, consequence):
150+
def shrinkage(self, antecedent, consequent):
146151
differences = []
147152

148153
for i in range(len(antecedent)):
@@ -156,11 +161,11 @@ def shrinkage(self, antecedent, consequence):
156161
differences.append(diff)
157162

158163
con_counter = 0
159-
for ll in range(len(antecedent), len(antecedent) + len(consequence)):
164+
for ll in range(len(antecedent), len(antecedent) + len(consequent)):
160165
feature = self.features[self.permutation[ll]]
161166
if feature.dtype == 'float' or feature.dtype == 'int':
162-
if consequence[con_counter] != 'NO':
163-
borders = consequence[con_counter]
167+
if consequent[con_counter] != 'NO':
168+
borders = consequent[con_counter]
164169
diff_borders = borders[1] - borders[0]
165170
total_borders = feature.max_val - feature.min_val
166171
diff = diff_borders / total_borders
@@ -175,9 +180,9 @@ def shrinkage(self, antecedent, consequence):
175180
return 0.0
176181
return 1 - normalized
177182

178-
def format_rules(self, antecedent, consequence):
183+
def format_rules(self, antecedent, consequent):
179184
antecedent1 = []
180-
consequence1 = []
185+
consequent1 = []
181186

182187
for i in range(len(antecedent)):
183188
if antecedent[i] != "NO":
@@ -188,19 +193,20 @@ def format_rules(self, antecedent, consequence):
188193
rule = feature.name + "(" + str(antecedent[i]) + ")"
189194
antecedent1.append(rule)
190195

191-
for i in range(len(consequence)):
192-
if consequence[i] != "NO":
196+
for i in range(len(consequent)):
197+
if consequent[i] != "NO":
193198
feature = self.features[self.permutation[i + len(antecedent)]]
194199
if feature.dtype == "cat":
195-
rule = feature.name + "(" + str(consequence[i][0]) + ")"
200+
rule = feature.name + "(" + str(consequent[i][0]) + ")"
196201
else:
197-
rule = feature.name + "(" + str(consequence[i]) + ")"
198-
consequence1.append(rule)
199-
return antecedent1, consequence1
202+
rule = feature.name + "(" + str(consequent[i]) + ")"
203+
consequent1.append(rule)
204+
return antecedent1, consequent1
200205

201206

202207
def _normalize(value, actual_bounds, real_bounds):
203-
return real_bounds[0] + (value - real_bounds[0]) * (real_bounds[1] - real_bounds[0]) / (actual_bounds[1] - actual_bounds[0])
208+
return real_bounds[0] + (value - real_bounds[0]) * (real_bounds[1] - real_bounds[0]) / (
209+
actual_bounds[1] - actual_bounds[0])
204210

205211

206212
def _rule_feasible(ant, con):

niaarm/dataset.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ def __analyse_types(self):
4141
unique_categories = None
4242
else:
4343
dtype = "cat"
44-
unique_categories = sorted(col.astype('string').unique().tolist(), key=str.lower) # convert to str just in case
44+
unique_categories = sorted(col.astype('string').unique().tolist(), key=str.lower)
4545
min_value = None
4646
max_value = None
4747

niaarm/niaarm.py

Lines changed: 47 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -8,20 +8,23 @@
88
class NiaARM(Problem):
99
r"""Implementation of NiaARM.
1010
11-
Date:
12-
2021
13-
1411
Reference:
1512
The implementation is composed of ideas found in the following papers:
1613
17-
I. Fister Jr., A. Iglesias, A. Gálvez, J. Del Ser, E. Osaba, I Fister. [Differential evolution for association rule mining using categorical and numerical attributes](http://www.iztok-jr-fister.eu/static/publications/231.pdf) In: Intelligent data engineering and automated learning - IDEAL 2018, pp. 79-88, 2018.
18-
19-
I. Fister Jr., V. Podgorelec, I. Fister. Improved Nature-Inspired Algorithms for Numeric Association Rule Mining. In: Vasant P., Zelinka I., Weber GW. (eds) Intelligent Computing and Optimization. ICO 2020. Advances in Intelligent Systems and Computing, vol 1324. Springer, Cham.
14+
I. Fister Jr., A. Iglesias, A. Gálvez, J. Del Ser, E. Osaba, I Fister.
15+
[Differential evolution for association rule mining using categorical and numerical attributes]
16+
(http://www.iztok-jr-fister.eu/static/publications/231.pdf)
17+
In: Intelligent data engineering and automated learning - IDEAL 2018, pp. 79-88, 2018.
2018
21-
License:
22-
MIT
19+
I. Fister Jr., V. Podgorelec, I. Fister.
20+
Improved Nature-Inspired Algorithms for Numeric Association Rule Mining.
21+
In: Vasant P., Zelinka I., Weber GW. (eds) Intelligent Computing and Optimization. ICO 2020.
22+
Advances in Intelligent Systems and Computing, vol 1324. Springer, Cham.
2323
2424
Attributes:
25+
features (list[Feature]): List of features.
26+
transactions (np.ndarray): Data from transaction database.
27+
rules (list[Rule]): Mined association rules.
2528
2629
"""
2730

@@ -42,30 +45,26 @@ def __init__(self, dimension, features, transactions, alpha=0.0, beta=0.0, gamma
4245
self.rules = []
4346
super().__init__(dimension, 0.0, 1.0)
4447

45-
def rule_exists(self, antecedent, consequence):
48+
def rule_exists(self, antecedent, consequent):
4649
r"""Check if association rule already exists."""
4750
for rule in self.rules:
48-
if rule.antecedent == antecedent and rule.consequence == consequence:
51+
if rule.antecedent == antecedent and rule.consequent == consequent:
4952
return True
5053
return False
5154

5255
def export_rules(self, path):
5356
r"""Save all association rules found to csv file."""
54-
try:
55-
with open(path, 'w', newline='') as f:
56-
writer = csv.writer(f)
57-
58-
# write header
59-
writer.writerow(["Antecedent", "Consequence", "Fitness", "Support", "Confidence", "Coverage", "Shrinkage"])
60-
61-
for rule in self.rules:
62-
writer.writerow(
63-
[rule.antecedent, rule.consequence, rule.fitness, rule.support, rule.confidence, rule.coverage,
64-
rule.shrink])
65-
except OSError:
66-
print('OSError:', path)
67-
else:
68-
print("Output successfully")
57+
with open(path, 'w', newline='') as f:
58+
writer = csv.writer(f)
59+
60+
# write header
61+
writer.writerow(["Antecedent", "consequent", "Fitness", "Support", "Confidence", "Coverage", "Shrinkage"])
62+
63+
for rule in self.rules:
64+
writer.writerow(
65+
[rule.antecedent, rule.consequent, rule.fitness, rule.support, rule.confidence, rule.coverage,
66+
rule.shrink])
67+
print(f"Rules exported to {path}")
6968

7069
def sort_rules(self):
7170
self.rules.sort(key=lambda x: x.fitness, reverse=True)
@@ -81,70 +80,71 @@ def _evaluate(self, sol):
8180

8281
rule = arm.build_rule(solution)
8382

84-
# get antecedent and consequence of rule
83+
# get antecedent and consequent of rule
8584
antecedent = rule[:cut]
86-
consequence = rule[cut:]
85+
consequent = rule[cut:]
8786

8887
# check if rule is feasible
89-
if _rule_feasible(antecedent, consequence):
88+
if _rule_feasible(antecedent, consequent):
9089
# get support and confidence of rule
91-
support, confidence = arm.support_confidence(antecedent, consequence, self.transactions)
90+
support, confidence = arm.support_confidence(antecedent, consequent, self.transactions)
9291

9392
if self.gamma == 0.0:
9493
shrinkage = 0
9594
else:
96-
shrinkage = arm.shrinkage(antecedent, consequence)
95+
shrinkage = arm.shrinkage(antecedent, consequent)
9796

9897
if self.delta == 0.0:
9998
coverage = 0
10099
else:
101-
coverage = arm.coverage(antecedent, consequence)
100+
coverage = arm.coverage(antecedent, consequent)
102101

103102
fitness = ((self.alpha * support) + (self.beta * confidence) + (self.gamma * shrinkage) +
104103
(self.delta * coverage)) / (self.alpha + self.beta + self.gamma + self.delta)
105104

106-
# in case no attributes were selected for antecedent or consequence
107-
if antecedent.count("NO") == len(antecedent) or consequence.count("NO") == len(consequence):
105+
# in case no attributes were selected for antecedent or consequent
106+
if antecedent.count("NO") == len(antecedent) or consequent.count("NO") == len(consequent):
108107
fitness = 0.0
109108

110109
if support > 0.0 and confidence > 0.0:
111-
antecedent, consequence = _fix_border(antecedent, consequence)
110+
antecedent, consequent = _fix_border(antecedent, consequent)
112111
# format rule; remove NO; add name of features
113-
antecedent1, consequence1 = arm.format_rules(antecedent, consequence)
112+
antecedent1, consequent1 = arm.format_rules(antecedent, consequent)
114113

115114
# save feasible rule
116-
if not self.rule_exists(antecedent1, consequence1):
117-
self.rules.append(Rule(antecedent1, consequence1, fitness, support, confidence, coverage, shrinkage))
115+
if not self.rule_exists(antecedent1, consequent1):
116+
self.rules.append(
117+
Rule(antecedent1, consequent1, fitness, support, confidence, coverage, shrinkage))
118118

119119
if fitness > self.best_fitness:
120120
self.best_fitness = fitness
121-
print(f'Fitness: {fitness}, Support: {support}, Confidence:{confidence}, Coverage:{coverage}, Shrinkage:{shrinkage}')
121+
print(f'Fitness: {fitness}, Support: {support}, Confidence:{confidence}, Coverage:{coverage}, '
122+
f'Shrinkage:{shrinkage}')
122123
return fitness
123124
else:
124125
return -1.0
125126

126127

127-
def _fix_border(antecedent, consequence):
128+
def _fix_border(antecedent, consequent):
128129
r"""In case lower and upper bounds of interval are the same.
129130
We need this in order to provide clean output.
130131
131132
Arguments:
132133
antecedent (np.ndarray): .
133-
consequence (np.ndarray): .
134+
consequent (np.ndarray): .
134135
135136
Returns:
136137
antecedent (array):
137-
consequence (array):
138+
consequent (array):
138139
"""
139-
140140
for i in range(len(antecedent)):
141141
if len(antecedent[i]) > 1:
142142
if antecedent[i][0] == antecedent[i][1]:
143143
antecedent[i] = antecedent[i][0]
144144

145-
for i in range(len(consequence)):
146-
if len(consequence[i]) > 1:
147-
if consequence[i][0] == consequence[i][1]:
148-
consequence[i] = consequence[i][0]
145+
for i in range(len(consequent)):
146+
if len(consequent[i]) > 1:
147+
if consequent[i][0] == consequent[i][1]:
148+
consequent[i] = consequent[i][0]
149149

150-
return antecedent, consequence
150+
return antecedent, consequent

0 commit comments

Comments
 (0)