Skip to content

Commit 5e9b536

Browse files
Created script for exerimentation #140
1 parent fc15dac commit 5e9b536

File tree

3 files changed

+276
-0
lines changed

3 files changed

+276
-0
lines changed

experimentation/__init__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
#!/usr/bin/env python
2+
# -*- coding:utf-8 -*-
3+
# @Filename: __init__.py.py
4+
# @Author: Daniel Puente Ramírez
5+
# @Time: 24/3/22 10:11

experimentation/general.py

Lines changed: 180 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,180 @@
1+
#!/usr/bin/env python
2+
# -*- coding:utf-8 -*-
3+
# @Filename: general.py
4+
# @Author: Daniel Puente Ramírez
5+
# @Time: 24/3/22 10:11
6+
7+
import csv
8+
import logging
9+
import os
10+
import sys
11+
import time
12+
from math import floor
13+
from os import walk
14+
15+
current = os.path.dirname(os.path.realpath(__file__))
16+
parent = os.path.dirname(current)
17+
sys.path.append(parent)
18+
19+
import numpy as np
20+
import pandas as pd
21+
import yagmail
22+
from sklearn.metrics import mean_squared_error, f1_score, accuracy_score
23+
from sklearn.model_selection import StratifiedKFold
24+
from sklearn.naive_bayes import GaussianNB
25+
from sklearn.neighbors import KNeighborsClassifier
26+
from sklearn.tree import DecisionTreeClassifier
27+
28+
from instance_selection import ENN, LSSm
29+
from semisupervised import DensityPeaks
30+
31+
time_str = time.strftime("%Y%m%d-%H%M%S")
32+
k = 3
33+
folds = 10
34+
precision = 0.05
35+
file_name = 'experiments'
36+
csv_results = os.path.join('.', 'results', file_name + '_' + time_str + '.csv')
37+
log_file = os.path.join('.', 'logs', '_'.join([file_name, time_str]) + '.log')
38+
39+
logging.basicConfig(level=logging.DEBUG,
40+
format=' %(asctime)s :: %(levelname)-8s :: %(message)s',
41+
handlers=[logging.FileHandler(log_file),
42+
logging.StreamHandler(sys.stdout)]
43+
)
44+
45+
46+
def search_datasets(folder):
47+
if os.path.isdir(folder):
48+
logging.info(f'Looking up for datasets in {folder}')
49+
else:
50+
logging.error(f'{folder} does not exist')
51+
52+
datasets_found = next(walk(folder), (None, None, []))[2]
53+
datasets_found.sort()
54+
logging.info(f'Founded {len(datasets_found)} - {datasets_found}')
55+
56+
header = [
57+
'dataset',
58+
'percent labeled',
59+
'fold',
60+
'base',
61+
'Filter',
62+
'f1-score',
63+
'mean squared error',
64+
'accuracy score'
65+
]
66+
67+
with open(csv_results, 'w') as save:
68+
w = csv.writer(save)
69+
w.writerow(header)
70+
save.close()
71+
72+
datasets = dict.fromkeys(datasets_found)
73+
for dataset in datasets_found:
74+
datasets[dataset] = pd.read_csv(os.path.join(folder, dataset),
75+
header=None)
76+
logging.debug('Datasets ready to be used')
77+
78+
return datasets
79+
80+
81+
def main(datasets):
82+
logging.info('Starting main...')
83+
random_state = 0x24032022
84+
skf = StratifiedKFold(n_splits=folds, shuffle=True,
85+
random_state=random_state)
86+
classifiers = [KNeighborsClassifier, DecisionTreeClassifier, GaussianNB]
87+
classifiers_params = [
88+
{'n_neighbors': k, 'n_jobs': -1}, {'random_state': random_state}, {}
89+
]
90+
filters = [ENN, LSSm, 'ENANE']
91+
92+
for dataset, values in datasets.items():
93+
logging.info(f'\n\nCurrent dataset: {dataset} - Shape: '
94+
f'{values.shape}')
95+
for n_classifier, classifier in enumerate(classifiers):
96+
classifier_name = classifier.__name__
97+
for filter_method in filters:
98+
filter_name = filter_method if isinstance(
99+
filter_method, str) else filter_method.__name__
100+
samples = values.iloc[:, :-1]
101+
y = values.iloc[:, -1]
102+
y_df = pd.DataFrame(y.tolist())
103+
for fold, (train_index, test_index) in enumerate(skf.split(
104+
samples, y)):
105+
t_start = time.time()
106+
logging.info(f'Dataset: {dataset} -- Classifier: '
107+
f'{classifier_name} -- Filter: {filter_name} '
108+
f'-- Fold: {fold}')
109+
x_train = samples.iloc[train_index, :].copy(deep=True)
110+
x_test = samples.iloc[test_index, :].copy(deep=True)
111+
y_train = y_df.iloc[train_index, :].copy(deep=True)
112+
y_test = y_df.iloc[test_index, :].copy(deep=True)
113+
114+
unlabeled_indexes = np.random.choice(
115+
train_index, floor(len(x_train) * (1 - precision)),
116+
replace=False)
117+
118+
y_train.at[unlabeled_indexes] = -1
119+
120+
model = DensityPeaks.STDPNF(
121+
classifier=classifier,
122+
classifier_params=classifiers_params[n_classifier],
123+
filtering=True,
124+
filter_method=filter_method
125+
)
126+
try:
127+
model.fit(x_train, y_train)
128+
y_pred = model.predict(x_test)
129+
f1 = f1_score(y_true=y_test, y_pred=y_pred,
130+
average="weighted")
131+
mse = mean_squared_error(y_true=y_test,
132+
y_pred=y_pred)
133+
acc = accuracy_score(y_true=y_test, y_pred=y_pred)
134+
135+
logging.info(f'\tf1: {f1:.2f} -- mse: {mse:.2f} -- acc:'
136+
f' {acc:.2f}')
137+
except Exception:
138+
f1 = mse = acc = ''
139+
logging.exception('Failed')
140+
t_end = time.time()
141+
logging.info(
142+
f'\t\tElapsed: {(t_end - t_start) / 60:.2f} minutes')
143+
with open(csv_results, 'a') as save:
144+
w = csv.writer(save)
145+
w.writerow([dataset, precision, fold, classifier_name,
146+
filter_name, f1, mse, acc])
147+
148+
149+
if __name__ == '__main__':
150+
mail = sys.argv[1]
151+
passwd = sys.argv[2]
152+
yag = yagmail.SMTP(user=mail, password=passwd)
153+
t_start_g = time.time()
154+
try:
155+
logging.info('--- Starting ---')
156+
datasets_folder = os.path.join('..', 'datasets', 'UCI-Experimentation')
157+
datasets_dfs = search_datasets(datasets_folder)
158+
159+
main(datasets_dfs)
160+
161+
logging.info('--- Process completed ---')
162+
attach = [csv_results, log_file]
163+
t_end_g = time.time()
164+
logging.info(f'Elapsed: {(t_end_g - t_start_g) / 60:.2f} minutes')
165+
yag.send(to='[email protected]', subject='self_training_validation '
166+
'COMPLETED',
167+
contents='self_training_validation has been completed.\n'
168+
f'Elapsed: {(t_end_g - t_start_g) / 60:.2f} minutes',
169+
attachments=attach)
170+
except Exception as e:
171+
t_end_g = time.time()
172+
content = f'FATAL ERROR - Check the attached log\n' \
173+
f'Elapsed: {(t_end_g - t_start_g) / 60:.2f} minutes'
174+
175+
yag.send(to='[email protected]', subject='self_training_validation '
176+
'ERROR',
177+
contents=content, attachments=[log_file])
178+
logging.exception('--- Process has broken ---')
179+
logging.info(f'Elapsed: {(t_end_g - t_start_g) / 60:.2f} minutes')
180+
logging.info("Email sent successfully")
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
dataset,percent labeled,fold,base,Filter,f1-score,mean squared error,accuracy score
2+
BreastTissue.csv,0.05,0,KNeighborsClassifier,ENN,0.12396694214876032,3.090909090909091,0.18181818181818182
3+
BreastTissue.csv,0.05,1,KNeighborsClassifier,ENN,0.055944055944055944,8.545454545454545,0.18181818181818182
4+
BreastTissue.csv,0.05,2,KNeighborsClassifier,ENN,0.3070707070707071,2.6363636363636362,0.36363636363636365
5+
BreastTissue.csv,0.05,3,KNeighborsClassifier,ENN,0.20202020202020202,5.181818181818182,0.36363636363636365
6+
BreastTissue.csv,0.05,4,KNeighborsClassifier,ENN,0.05194805194805196,2.1818181818181817,0.09090909090909091
7+
BreastTissue.csv,0.05,5,KNeighborsClassifier,ENN,0.31457431457431456,4.181818181818182,0.45454545454545453
8+
BreastTissue.csv,0.05,6,KNeighborsClassifier,ENN,0.20606060606060606,9.6,0.3
9+
BreastTissue.csv,0.05,7,KNeighborsClassifier,ENN,0.06666666666666668,9.7,0.2
10+
BreastTissue.csv,0.05,8,KNeighborsClassifier,ENN,0.16,3.3,0.2
11+
BreastTissue.csv,0.05,9,KNeighborsClassifier,ENN,0.27999999999999997,5.5,0.4
12+
BreastTissue.csv,0.05,0,KNeighborsClassifier,LSSm,0.35497835497835495,3.4545454545454546,0.45454545454545453
13+
BreastTissue.csv,0.05,1,KNeighborsClassifier,LSSm,0.3428571428571429,3.1818181818181817,0.45454545454545453
14+
BreastTissue.csv,0.05,2,KNeighborsClassifier,LSSm,0.055944055944055944,4.818181818181818,0.18181818181818182
15+
BreastTissue.csv,0.05,3,KNeighborsClassifier,LSSm,0.1212121212121212,3.090909090909091,0.18181818181818182
16+
BreastTissue.csv,0.05,4,KNeighborsClassifier,LSSm,0.12121212121212123,4.636363636363637,0.2727272727272727
17+
BreastTissue.csv,0.05,5,KNeighborsClassifier,LSSm,0.31457431457431456,4.181818181818182,0.45454545454545453
18+
BreastTissue.csv,0.05,6,KNeighborsClassifier,LSSm,0.06666666666666668,9.7,0.2
19+
BreastTissue.csv,0.05,7,KNeighborsClassifier,LSSm,0.0,3.2,0.0
20+
BreastTissue.csv,0.05,8,KNeighborsClassifier,LSSm,0.06666666666666668,9.7,0.2
21+
BreastTissue.csv,0.05,9,KNeighborsClassifier,LSSm,0.22222222222222224,2.9,0.3
22+
BreastTissue.csv,0.05,0,KNeighborsClassifier,ENANE,0.0,2.727272727272727,0.0
23+
BreastTissue.csv,0.05,1,KNeighborsClassifier,ENANE,0.21818181818181823,4.909090909090909,0.36363636363636365
24+
BreastTissue.csv,0.05,2,KNeighborsClassifier,ENANE,0.21818181818181823,4.909090909090909,0.36363636363636365
25+
BreastTissue.csv,0.05,3,KNeighborsClassifier,ENANE,0.2626262626262626,1.7272727272727273,0.36363636363636365
26+
BreastTissue.csv,0.05,4,KNeighborsClassifier,ENANE,0.0,2.272727272727273,0.0
27+
BreastTissue.csv,0.05,5,KNeighborsClassifier,ENANE,0.23376623376623373,2.272727272727273,0.2727272727272727
28+
BreastTissue.csv,0.05,6,KNeighborsClassifier,ENANE,0.2,3.5,0.2
29+
BreastTissue.csv,0.05,7,KNeighborsClassifier,ENANE,0.185,2.2,0.3
30+
BreastTissue.csv,0.05,8,KNeighborsClassifier,ENANE,0.24888888888888888,5.8,0.4
31+
BreastTissue.csv,0.05,9,KNeighborsClassifier,ENANE,0.27999999999999997,5.5,0.4
32+
BreastTissue.csv,0.05,0,DecisionTreeClassifier,ENN,0.0,4.636363636363637,0.0
33+
BreastTissue.csv,0.05,1,DecisionTreeClassifier,ENN,0.1212121212121212,4.090909090909091,0.09090909090909091
34+
BreastTissue.csv,0.05,2,DecisionTreeClassifier,ENN,0.18181818181818182,6.090909090909091,0.2727272727272727
35+
BreastTissue.csv,0.05,3,DecisionTreeClassifier,ENN,0.3151515151515152,2.909090909090909,0.36363636363636365
36+
BreastTissue.csv,0.05,4,DecisionTreeClassifier,ENN,0.25757575757575757,2.272727272727273,0.2727272727272727
37+
BreastTissue.csv,0.05,5,DecisionTreeClassifier,ENN,0.2727272727272727,1.7272727272727273,0.36363636363636365
38+
BreastTissue.csv,0.05,6,DecisionTreeClassifier,ENN,0.13333333333333333,5.0,0.2
39+
BreastTissue.csv,0.05,7,DecisionTreeClassifier,ENN,0.3,3.1,0.4
40+
BreastTissue.csv,0.05,8,DecisionTreeClassifier,ENN,0.2333333333333333,3.3,0.3
41+
BreastTissue.csv,0.05,9,DecisionTreeClassifier,ENN,0.21000000000000002,4.9,0.3
42+
BreastTissue.csv,0.05,0,DecisionTreeClassifier,LSSm,0.32727272727272727,3.8181818181818183,0.45454545454545453
43+
BreastTissue.csv,0.05,1,DecisionTreeClassifier,LSSm,0.17316017316017315,3.909090909090909,0.18181818181818182
44+
BreastTissue.csv,0.05,2,DecisionTreeClassifier,LSSm,0.38181818181818183,2.727272727272727,0.45454545454545453
45+
BreastTissue.csv,0.05,3,DecisionTreeClassifier,LSSm,0.19090909090909092,3.909090909090909,0.2727272727272727
46+
BreastTissue.csv,0.05,4,DecisionTreeClassifier,LSSm,0.15584415584415587,4.181818181818182,0.18181818181818182
47+
BreastTissue.csv,0.05,5,DecisionTreeClassifier,LSSm,0.1515151515151515,3.5454545454545454,0.18181818181818182
48+
BreastTissue.csv,0.05,6,DecisionTreeClassifier,LSSm,0.17333333333333334,4.1,0.3
49+
BreastTissue.csv,0.05,7,DecisionTreeClassifier,LSSm,0.2,4.9,0.2
50+
BreastTissue.csv,0.05,8,DecisionTreeClassifier,LSSm,0.1619047619047619,2.2,0.2
51+
BreastTissue.csv,0.05,9,DecisionTreeClassifier,LSSm,0.0,5.7,0.0
52+
BreastTissue.csv,0.05,0,DecisionTreeClassifier,ENANE,0.0,3.909090909090909,0.0
53+
BreastTissue.csv,0.05,1,DecisionTreeClassifier,ENANE,0.09090909090909091,4.636363636363637,0.09090909090909091
54+
BreastTissue.csv,0.05,2,DecisionTreeClassifier,ENANE,0.04040404040404041,4.363636363636363,0.09090909090909091
55+
BreastTissue.csv,0.05,3,DecisionTreeClassifier,ENANE,0.21818181818181823,2.3636363636363638,0.36363636363636365
56+
BreastTissue.csv,0.05,4,DecisionTreeClassifier,ENANE,0.13636363636363635,4.454545454545454,0.18181818181818182
57+
BreastTissue.csv,0.05,5,DecisionTreeClassifier,ENANE,0.2564935064935065,4.0,0.36363636363636365
58+
BreastTissue.csv,0.05,6,DecisionTreeClassifier,ENANE,0.06666666666666667,6.0,0.1
59+
BreastTissue.csv,0.05,7,DecisionTreeClassifier,ENANE,0.29333333333333333,3.5,0.3
60+
BreastTissue.csv,0.05,8,DecisionTreeClassifier,ENANE,0.0,7.5,0.0
61+
BreastTissue.csv,0.05,9,DecisionTreeClassifier,ENANE,0.27999999999999997,5.5,0.4
62+
BreastTissue.csv,0.05,0,GaussianNB,ENN,0.015151515151515154,3.727272727272727,0.09090909090909091
63+
BreastTissue.csv,0.05,1,GaussianNB,ENN,0.055944055944055944,8.545454545454545,0.18181818181818182
64+
BreastTissue.csv,0.05,2,GaussianNB,ENN,0.055944055944055944,8.545454545454545,0.18181818181818182
65+
BreastTissue.csv,0.05,3,GaussianNB,ENN,0.055944055944055944,8.545454545454545,0.18181818181818182
66+
BreastTissue.csv,0.05,4,GaussianNB,ENN,0.11688311688311687,7.363636363636363,0.2727272727272727
67+
BreastTissue.csv,0.05,5,GaussianNB,ENN,0.055944055944055944,3.8181818181818183,0.18181818181818182
68+
BreastTissue.csv,0.05,6,GaussianNB,ENN,0.06666666666666668,9.7,0.2
69+
BreastTissue.csv,0.05,7,GaussianNB,ENN,0.06666666666666668,9.7,0.2
70+
BreastTissue.csv,0.05,8,GaussianNB,ENN,0.01818181818181818,3.7,0.1
71+
BreastTissue.csv,0.05,9,GaussianNB,ENN,0.06666666666666668,5.7,0.2
72+
BreastTissue.csv,0.05,0,GaussianNB,LSSm,0.055944055944055944,3.090909090909091,0.18181818181818182
73+
BreastTissue.csv,0.05,1,GaussianNB,LSSm,0.055944055944055944,3.3636363636363638,0.18181818181818182
74+
BreastTissue.csv,0.05,2,GaussianNB,LSSm,0.055944055944055944,8.545454545454545,0.18181818181818182
75+
BreastTissue.csv,0.05,3,GaussianNB,LSSm,0.1212121212121212,3.090909090909091,0.18181818181818182
76+
BreastTissue.csv,0.05,4,GaussianNB,LSSm,0.2337662337662338,4.7272727272727275,0.36363636363636365
77+
BreastTissue.csv,0.05,5,GaussianNB,LSSm,0.31457431457431456,4.181818181818182,0.45454545454545453
78+
BreastTissue.csv,0.05,6,GaussianNB,LSSm,0.06666666666666668,9.7,0.2
79+
BreastTissue.csv,0.05,7,GaussianNB,LSSm,0.01818181818181818,3.7,0.1
80+
BreastTissue.csv,0.05,8,GaussianNB,LSSm,0.019999999999999997,3.7,0.1
81+
BreastTissue.csv,0.05,9,GaussianNB,LSSm,0.06666666666666668,9.7,0.2
82+
BreastTissue.csv,0.05,0,GaussianNB,ENANE,0.055944055944055944,3.090909090909091,0.18181818181818182
83+
BreastTissue.csv,0.05,1,GaussianNB,ENANE,0.055944055944055944,3.090909090909091,0.18181818181818182
84+
BreastTissue.csv,0.05,2,GaussianNB,ENANE,0.055944055944055944,4.818181818181818,0.18181818181818182
85+
BreastTissue.csv,0.05,3,GaussianNB,ENANE,0.055944055944055944,3.090909090909091,0.18181818181818182
86+
BreastTissue.csv,0.05,4,GaussianNB,ENANE,0.11688311688311687,7.363636363636363,0.2727272727272727
87+
BreastTissue.csv,0.05,5,GaussianNB,ENANE,0.015151515151515154,3.0,0.09090909090909091
88+
BreastTissue.csv,0.05,6,GaussianNB,ENANE,0.01818181818181818,3.7,0.1
89+
BreastTissue.csv,0.05,7,GaussianNB,ENANE,0.24888888888888888,5.8,0.4
90+
BreastTissue.csv,0.05,8,GaussianNB,ENANE,0.06666666666666668,9.7,0.2
91+
BreastTissue.csv,0.05,9,GaussianNB,ENANE,0.06666666666666668,5.7,0.2

0 commit comments

Comments
 (0)