-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathK_Lasso.py
More file actions
75 lines (65 loc) · 3.41 KB
/
K_Lasso.py
File metadata and controls
75 lines (65 loc) · 3.41 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import numpy as np
from sklearn.gaussian_process.kernels import RBF
from sklearn import linear_model
import warnings as wr
def calc_dist(sample_idx, data): # RBF kernel
if 0 in np.array(data.std()):
wr.warn("The standard deviation of an data series is 0. This series will be ignored for distance calculation.")
data = np.array(data)[:, np.array(data.std()) != 0]
kernel = RBF(length_scale=np.array(data.std(axis=0)), length_scale_bounds=(0.0, 1.0))
sample_dist = kernel(data)[sample_idx]
return sample_dist
def k_lasso(interpretable_data, distance, target, k, kernel="RBF", sample_idx=-1, limit=1e-3, method='lasso_path'):
"""
Perform a distance based lasso that returns weights for all parameters,
with the k most relevant having an absolute weight above limit
:param limit: limit for which k will be seen as irrelevant for explanation
:param sample_idx: index of base sample. If -1 the kernel will not be applied,
but distance will be used as wights directly
:param kernel: a kernel function that is used with the values of "distance"
:param k: number of feature weights that should be above "limit", hyperparameter will be adjusted accordingly
:param target: parameter that should be explained/ target of regression
:param distance: distance between samples
:param interpretable_data: list of samples with different feature variations
:param method: method for the regression, either "lasso_path" or "ridge"
"""
# get weights
if sample_idx != -1:
if kernel == "RBF":
# the sample is needed since the kernel calculates the distance between all points
# and we are only interested to the distance to the main sample
dist = calc_dist(sample_idx, distance)
else:
# External kernel. Expected to work like the kernels from sklearn.
dist = kernel(distance)[sample_idx]
else:
# External weights
dist = distance
if method == 'lasso_path':
# regression using the lasso method (absolute penalty term)
weighted_data = ((interpretable_data - np.average(interpretable_data, axis=0,
weights=dist))
* np.sqrt(dist[:, np.newaxis]))
weighted_labels = ((target - np.average(target, axis=0, weights=dist))
* np.sqrt(dist[:]))
alphas, _, coefs = linear_model.lars_path(weighted_data.to_numpy(),
weighted_labels.to_numpy(),
method='lasso',
verbose=False)
for i in range(coefs.shape[1]):
if sum(abs(coefs.T)[i] > 0) == k:
return coefs.T[i]
if method == 'ridge':
# regression using the ridge regression (quadratic penalty term)
relevant_results = k + 1
ld = 1
r = []
while relevant_results > k and ld < 200:
# update hyperparameter
a = 0.1 * np.exp(0.1 * ld) - 0.1
clf = linear_model.Ridge(alpha=a)
clf.fit(interpretable_data, target, sample_weight=dist)
r = clf.coef_
relevant_results = sum(int((abs(r[i]) - limit) >= 0) for i in range(interpretable_data.columns.__len__()))
ld += 1
return r