Skip to content

Commit c6a7cf7

Browse files
authored
Merge pull request #18 from zStupan/feature-cli
CLI
2 parents f3a5c89 + 27f1cd8 commit c6a7cf7

File tree

5 files changed

+267
-5
lines changed

5 files changed

+267
-5
lines changed

README.md

Lines changed: 70 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
NiaARM is a framework for Association Rule Mining based on nature-inspired algorithms for optimization. The framework is written fully in Python and runs on all platforms. NiaARM allows users to preprocess the data in a transaction database automatically, to search for association rules and provide a pretty output of the rules found. This framework also supports numerical and real-valued types of attributes besides the categorical ones. Mining the association rules is defined as an optimization problem, and solved using the nature-inspired algorithms that come from the related framework called [NiaPy](https://github.com/NiaOrg/NiaPy).
1919

2020
## Detailed insights
21-
The current version witholds (but is not limited to) the following functions:
21+
The current version includes (but is not limited to) the following functions:
2222

2323
- loading datasets in CSV format,
2424
- preprocessing of data,
@@ -36,9 +36,76 @@ Install NiaARM with pip3:
3636
pip3 install niaarm
3737
```
3838

39-
## Examples
39+
## Usage
4040

41-
For a list of examples see the [examples folder](examples/).
41+
### Basic example
42+
```python
43+
from niaarm import NiaARM, Dataset
44+
from niapy.algorithms.basic import DifferentialEvolution
45+
from niapy.task import Task, OptimizationType
46+
47+
48+
# load and preprocess the dataset from csv
49+
data = Dataset("datasets/Abalone.csv")
50+
51+
# Create a problem:::
52+
# dimension represents the dimension of the problem;
53+
# features represent the list of features, while transactions depicts the list of transactions
54+
# the following 4 elements represent weights (support, confidence, coverage, shrinkage)
55+
# None defines that criteria are omitted and are, therefore, excluded from the fitness function
56+
problem = NiaARM(data.dimension, data.features, data.transactions, alpha=1.0, beta=1.0)
57+
58+
# build niapy task
59+
task = Task(problem=problem, max_iters=30, optimization_type=OptimizationType.MAXIMIZATION)
60+
61+
# use Differential Evolution (DE) algorithm from the NiaPy library
62+
# see full list of available algorithms: https://github.com/NiaOrg/NiaPy/blob/master/Algorithms.md
63+
algo = DifferentialEvolution(population_size=50, differential_weight=0.5, crossover_probability=0.9)
64+
65+
# run algorithm
66+
best = algo.run(task=task)
67+
68+
# sort rules
69+
problem.sort_rules()
70+
71+
# export all rules to csv
72+
problem.export_rules('output.csv')
73+
```
74+
For a full list of examples see the [examples folder](examples/).
75+
76+
### Command line interface
77+
78+
```
79+
niaarm -h
80+
usage: niaarm [-h] -i INPUT_FILE [-o OUTPUT_FILE] -a ALGORITHM [-s SEED]
81+
[--max-evals MAX_EVALS] [--max-iters MAX_ITERS] [--alpha ALPHA]
82+
[--beta BETA] [--gamma GAMMA] [--delta DELTA] [--log]
83+
[--show-stats]
84+
85+
Perform ARM, output mined rules as csv, get mined rules' statistics
86+
87+
options:
88+
-h, --help show this help message and exit
89+
-i INPUT_FILE, --input-file INPUT_FILE
90+
Input file containing a csv dataset
91+
-o OUTPUT_FILE, --output-file OUTPUT_FILE
92+
Output file for mined rules
93+
-a ALGORITHM, --algorithm ALGORITHM
94+
Algorithm to use (niapy class name, e. g.
95+
DifferentialEvolution)
96+
-s SEED, --seed SEED Seed for the algorithm's random number generator
97+
--max-evals MAX_EVALS
98+
Maximum number of fitness function evaluations
99+
--max-iters MAX_ITERS
100+
Maximum number of iterations
101+
--alpha ALPHA Alpha parameter. Default 0
102+
--beta BETA Beta parameter. Default 0
103+
--gamma GAMMA Gamma parameter. Default 0
104+
--delta DELTA Delta parameter. Default 0
105+
--log Enable logging of fitness improvements
106+
--show-stats Display stats about mined rules
107+
```
108+
Note: The CLI script can also run as a python module (`python -m niaarm ...`)
42109

43110
## Reference Papers:
44111

niaarm/__main__.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
import sys
2+
from niaarm import cli
3+
4+
5+
if __name__ == '__main__':
6+
sys.exit(cli.main())

niaarm/cli.py

Lines changed: 181 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,181 @@
1+
import argparse
2+
from inspect import getmodule, getmembers, isfunction
3+
import os
4+
from pathlib import Path
5+
import platform
6+
import subprocess
7+
import sys
8+
import tempfile
9+
10+
import numpy as np
11+
from niaarm import NiaARM, Dataset, Stats
12+
from niapy.task import OptimizationType, Task
13+
from niapy.util.factory import get_algorithm
14+
from niapy.util import distances, repair
15+
from niapy.algorithms.other import mts
16+
from niapy.algorithms.basic import de
17+
18+
19+
def get_parser():
20+
parser = argparse.ArgumentParser(prog='niaarm',
21+
description='Perform ARM, output mined rules as csv, get mined rules\' statistics')
22+
parser.add_argument('-i', '--input-file', type=str, required=True, help='Input file containing a csv dataset')
23+
parser.add_argument('-o', '--output-file', type=str, help='Output file for mined rules')
24+
parser.add_argument('-a', '--algorithm', type=str, required=True,
25+
help='Algorithm to use (niapy class name, e.g. DifferentialEvolution)')
26+
parser.add_argument('-s', '--seed', type=int, help='Seed for the algorithm\'s random number generator')
27+
parser.add_argument('--max-evals', type=int, default=np.inf, help='Maximum number of fitness function evaluations')
28+
parser.add_argument('--max-iters', type=int, default=np.inf, help='Maximum number of iterations')
29+
parser.add_argument('--alpha', type=float, default=0.0, help='Alpha parameter. Default 0')
30+
parser.add_argument('--beta', type=float, default=0.0, help='Beta parameter. Default 0')
31+
parser.add_argument('--gamma', type=float, default=0.0, help='Gamma parameter. Default 0')
32+
parser.add_argument('--delta', type=float, default=0.0, help='Delta parameter. Default 0')
33+
parser.add_argument('--log', action='store_true', help='Enable logging of fitness improvements')
34+
parser.add_argument('--show-stats', action='store_true', help='Display stats about mined rules')
35+
36+
return parser
37+
38+
39+
def text_editor():
40+
return os.getenv('VISUAL') or os.getenv('EDITOR') or ('notepad' if platform.system() == 'Windows' else 'vi')
41+
42+
43+
def parameters_string(parameters):
44+
params_txt = '# You can edit the algorithm\'s parameter values here\n' \
45+
'# Save and exit to continue\n' \
46+
'# WARNING: Do not edit parameter names\n'
47+
for parameter, value in parameters.items():
48+
if isinstance(value, tuple):
49+
if callable(value[0]):
50+
value = tuple(v.__name__ for v in value)
51+
else:
52+
value = tuple(str(v) for v in value)
53+
value = ', '.join(value)
54+
params_txt += f'{parameter} = {value.__name__ if callable(value) else value}\n'
55+
return params_txt
56+
57+
58+
def functions(algorithm):
59+
funcs = {}
60+
algorithm_funcs = dict(getmembers(getmodule(algorithm.__class__), isfunction))
61+
repair_funcs = dict(getmembers(repair, isfunction))
62+
distance_funcs = dict(getmembers(distances, isfunction))
63+
de_funcs = dict(getmembers(de, isfunction))
64+
mts_funcs = dict(getmembers(mts, isfunction))
65+
funcs.update(algorithm_funcs)
66+
funcs.update(repair_funcs)
67+
funcs.update(distance_funcs)
68+
funcs.update(de_funcs)
69+
funcs.update(mts_funcs)
70+
return funcs
71+
72+
73+
def find_function(name, algorithm):
74+
return functions(algorithm)[name]
75+
76+
77+
def convert_string(string):
78+
try:
79+
value = float(string)
80+
if value.is_integer():
81+
value = int(value)
82+
except ValueError:
83+
return string
84+
return value
85+
86+
87+
def parse_parameters(text, algorithm):
88+
lines: list[str] = text.strip().split('\n')
89+
lines = [line.strip() for line in lines if line.strip() and not line.strip().startswith('#')]
90+
parameters = {}
91+
for line in lines:
92+
key, value = line.split('=')
93+
key = key.strip()
94+
value = convert_string(value.strip())
95+
if isinstance(value, str):
96+
if len(value.split(', ')) > 1: # tuple
97+
value = list(map(str.strip, value.split(', ')))
98+
value = tuple(map(convert_string, value))
99+
value = tuple(find_function(v, algorithm) for v in value if type(v) == str)
100+
elif value.lower() == 'true' or value.lower() == 'false': # boolean
101+
value = value.lower() == 'true'
102+
else: # probably a function
103+
value = find_function(value, algorithm)
104+
parameters[key] = value
105+
return parameters
106+
107+
108+
def edit_parameters(parameters, algorithm):
109+
parameters.pop('individual_type', None)
110+
parameters.pop('initialization_function', None)
111+
fd, filename = tempfile.mkstemp()
112+
os.close(fd)
113+
114+
new_parameters = None
115+
try:
116+
path = Path(filename)
117+
path.write_text(parameters_string(parameters))
118+
command = f'{text_editor()} {filename}'
119+
subprocess.run(command, shell=True, check=True)
120+
params_txt = path.read_text()
121+
new_parameters = parse_parameters(params_txt, algorithm)
122+
finally:
123+
try:
124+
os.unlink(filename)
125+
except Exception as e:
126+
print('Error:', e, file=sys.stderr)
127+
return new_parameters
128+
129+
130+
def main():
131+
parser = get_parser()
132+
args = parser.parse_args()
133+
134+
if len(sys.argv) == 1:
135+
parser.print_help()
136+
if args.max_evals == np.inf and args.max_iters == np.inf:
137+
print('--max-evals and/or --max-iters missing', file=sys.stderr)
138+
return 1
139+
140+
try:
141+
dataset = Dataset(args.input_file)
142+
problem = NiaARM(dataset.dimension, dataset.features, dataset.transactions, args.alpha, args.beta, args.gamma,
143+
args.delta, args.log)
144+
task = Task(problem, max_iters=args.max_iters, max_evals=args.max_evals,
145+
optimization_type=OptimizationType.MAXIMIZATION)
146+
147+
algorithm = get_algorithm(args.algorithm, seed=args.seed)
148+
params = algorithm.get_parameters()
149+
new_params = edit_parameters(params, algorithm.__class__)
150+
if new_params is None:
151+
print('Invalid parameters', file=sys.stderr)
152+
return 1
153+
154+
for param in new_params:
155+
if param not in params:
156+
print(f'Invalid parameter: {param}', file=sys.stderr)
157+
return 1
158+
159+
algorithm.set_parameters(**new_params)
160+
161+
algorithm.run(task)
162+
163+
if args.output_file:
164+
problem.sort_rules()
165+
problem.export_rules(args.output_file)
166+
167+
if args.show_stats:
168+
stats = Stats(problem.rules)
169+
print('\nSTATS:')
170+
print(f'Total rules: {stats.total_rules}')
171+
print(f'Average fitness: {stats.avg_fitness}')
172+
print(f'Average support: {stats.avg_support}')
173+
print(f'Average confidence: {stats.avg_confidence}')
174+
print(f'Average coverage: {stats.avg_coverage}')
175+
print(f'Average shrinkage: {stats.avg_shrinkage}')
176+
print(f'Average length of antecedent: {stats.avg_ant_len}')
177+
print(f'Average length of consequent: {stats.avg_con_len}')
178+
179+
except Exception as e:
180+
print('Error:', e, file=sys.stderr)
181+
return 1

niaarm/niaarm.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,19 +28,24 @@ class NiaARM(Problem):
2828
2929
"""
3030

31-
def __init__(self, dimension, features, transactions, alpha=0.0, beta=0.0, gamma=0.0, delta=0.0):
31+
def __init__(self, dimension, features, transactions, alpha=0.0, beta=0.0, gamma=0.0, delta=0.0, logging=False):
3232
r"""Initialize instance of NiaARM.
3333
3434
Arguments:
3535
3636
"""
3737
self.features = features
3838
self.transactions = transactions
39+
40+
if alpha + beta + gamma + delta == 0:
41+
raise ValueError('At least one of alpha, beta, gamma or delta must be set')
42+
3943
self.alpha = alpha
4044
self.beta = beta
4145
self.gamma = gamma
4246
self.delta = delta
4347

48+
self.logging = logging
4449
self.best_fitness = np.NINF
4550
self.rules = []
4651
super().__init__(dimension, 0.0, 1.0)
@@ -116,7 +121,7 @@ def _evaluate(self, sol):
116121
self.rules.append(
117122
Rule(antecedent1, consequent1, fitness, support, confidence, coverage, shrinkage))
118123

119-
if fitness > self.best_fitness:
124+
if self.logging and fitness > self.best_fitness:
120125
self.best_fitness = fitness
121126
print(f'Fitness: {fitness}, Support: {support}, Confidence:{confidence}, Coverage:{coverage}, '
122127
f'Shrinkage:{shrinkage}')

pyproject.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,9 @@ pandas = [
2020
[tool.poetry.dev-dependencies]
2121
pytest = "^7.0.1"
2222

23+
[tool.poetry.scripts]
24+
niaarm = 'niaarm.cli:main'
25+
2326
[build-system]
2427
requires = ["poetry-core>=1.0.0"]
2528
build-backend = "poetry.core.masonry.api"

0 commit comments

Comments
 (0)