-
Notifications
You must be signed in to change notification settings - Fork 8
Expand file tree
/
Copy pathevaluate.py
More file actions
110 lines (95 loc) · 4.21 KB
/
evaluate.py
File metadata and controls
110 lines (95 loc) · 4.21 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import argparse
import os
import torch
import numpy as np
from sklearn.svm import LinearSVC
from sklearn.decomposition import PCA
from sklearn.decomposition import TruncatedSVD
from sklearn.linear_model import SGDClassifier
from sklearn.svm import LinearSVC, SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
import dataset
import functionals as F
import utils
def svm(train_features, train_labels, test_features, test_labels):
svm = LinearSVC(verbose=0, random_state=10)
svm.fit(train_features, train_labels)
acc_train = svm.score(train_features, train_labels)
acc_test = svm.score(test_features, test_labels)
print("SVM: {}".format(acc_test))
return acc_train, acc_test
def knn(train_features, train_labels, test_features, test_labels, k=5):
"""Perform k-Nearest Neighbor classification using cosine similaristy as metric.
Options:
k (int): top k features for kNN
"""
sim_mat = train_features @ test_features.T
topk = torch.from_numpy(sim_mat).topk(k=k, dim=0)
topk_pred = train_labels[topk.indices]
test_pred = torch.tensor(topk_pred).mode(0).values.detach()
acc = compute_accuracy(test_pred.numpy(), test_labels)
print("kNN: {}".format(acc))
return acc
def nearsub(train_features, train_labels, test_features, test_labels, n_comp=10):
"""Perform nearest subspace classification.
Options:
n_comp (int): number of components for PCA or SVD
"""
scores_svd = []
classes = np.unique(test_labels)
features_sort, _ = utils.sort_dataset(train_features, train_labels,
classes=classes, stack=False)
fd = features_sort[0].shape[1]
if n_comp >= fd:
n_comp = fd - 1
for j in np.arange(len(classes)):
svd = TruncatedSVD(n_components=n_comp).fit(features_sort[j])
svd_subspace = svd.components_.T
svd_j = (np.eye(fd) - svd_subspace @ svd_subspace.T) \
@ (test_features).T
score_svd_j = np.linalg.norm(svd_j, ord=2, axis=0)
scores_svd.append(score_svd_j)
test_predict_svd = np.argmin(scores_svd, axis=0)
acc_svd = compute_accuracy(classes[test_predict_svd], test_labels)
print('SVD: {}'.format(acc_svd))
return acc_svd
def nearsub_pca(train_features, train_labels, test_features, test_labels, n_comp=10):
"""Perform nearest subspace classification.
Options:
n_comp (int): number of components for PCA or SVD
"""
scores_pca = []
classes = np.unique(test_labels)
features_sort, _ = utils.sort_dataset(train_features, train_labels,
classes=classes, stack=False)
fd = features_sort[0].shape[1]
if n_comp >= fd:
n_comp = fd - 1
for j in np.arange(len(classes)):
pca = PCA(n_components=n_comp).fit(features_sort[j])
pca_subspace = pca.components_.T
mean = np.mean(features_sort[j], axis=0)
pca_j = (np.eye(fd) - pca_subspace @ pca_subspace.T) \
@ (test_features - mean).T
score_pca_j = np.linalg.norm(pca_j, ord=2, axis=0)
scores_pca.append(score_pca_j)
test_predict_pca = np.argmin(scores_pca, axis=0)
acc_pca = compute_accuracy(classes[test_predict_pca], test_labels)
print('PCA: {}'.format(acc_pca))
return acc_svd
def compute_accuracy(y_pred, y_true):
"""Compute accuracy by counting correct classification. """
assert y_pred.shape == y_true.shape
return 1 - np.count_nonzero(y_pred - y_true) / y_true.size
def baseline(train_features, train_labels, test_features, test_labels):
test_models = {'log_l2': SGDClassifier(loss='log', max_iter=10000, random_state=42),
'SVM_linear': LinearSVC(max_iter=10000, random_state=42),
'SVM_RBF': SVC(kernel='rbf', random_state=42),
'DecisionTree': DecisionTreeClassifier(),
'RandomForrest': RandomForestClassifier()}
for model_name in test_models:
test_model = test_models[model_name]
test_model.fit(train_features, train_labels)
score = test_model.score(test_features, test_labels)
print(f"{model_name}: {score}")