Skip to content

Commit 2ea75d3

Browse files
committed
add example
1 parent 2542e26 commit 2ea75d3

File tree

6 files changed

+152
-3
lines changed

6 files changed

+152
-3
lines changed

dist/rehline-0.0.1.tar.gz

13.1 KB
Binary file not shown.

rehline/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,4 @@
33

44
from ._loss import ReHLoss, PQLoss
55
from ._class import ReHLine, ReHLine_solver
6-
6+
from ._base import make_fair_classification

rehline/_base.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77

88
import numpy as np
99
from scipy.special import huber
10+
from sklearn.datasets import make_classification
11+
from sklearn.preprocessing import StandardScaler
1012

1113
def relu(x):
1214
"""
@@ -49,3 +51,40 @@ def _check_rehu(rehu_coef, rehu_intercept, rehu_cut):
4951
assert rehu_coef.shape == rehu_intercept.shape, "`rehu_coef` and `rehu_intercept` should be the same shape!"
5052
if len(rehu_coef) > 0:
5153
assert (rehu_cut >= 0.0).all(), "`rehu_cut` must be non-negative!"
54+
55+
def make_fair_classification(n_samples=100, n_features=5, ind_sensitive=0):
56+
"""
57+
Generate a random binary fair classification problem.
58+
59+
Parameters
60+
----------
61+
n_samples : int, default=100
62+
The number of samples.
63+
64+
n_features : int, default=5
65+
The total number of features.
66+
67+
ind_sensitive : int, default=0
68+
The index of the sensitive feature.
69+
70+
Returns
71+
-------
72+
X : ndarray of shape (n_samples, n_features)
73+
The generated samples.
74+
75+
y : ndarray of shape (n_samples,)
76+
The +/- labels for class membership of each sample.
77+
78+
X_sen: ndarray of shape (n_samples,)
79+
The centered samples of the sensitive feature.
80+
"""
81+
82+
X, y = make_classification(n_samples, n_features)
83+
y = 2*y - 1
84+
85+
scaler = StandardScaler()
86+
X = scaler.fit_transform(X)
87+
88+
X_sen = X[:, ind_sensitive]
89+
90+
return X, y, X_sen

rehline/_class.py

Lines changed: 45 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,45 @@ class ReHLine(BaseEstimator):
4747
4848
n_iter_: int
4949
Maximum number of iterations run across all classes.
50+
51+
Examples
52+
--------
53+
54+
## test SVM on simulated dataset
55+
56+
import numpy as np
57+
from rehline import ReHLine
58+
59+
# simulate classification dataset
60+
>>> n, d, C = 1000, 3, 0.5
61+
>>> np.random.seed(1024)
62+
>>> X = np.random.randn(1000, 3)
63+
>>> beta0 = np.random.randn(3)
64+
>>> y = np.sign(X.dot(beta0) + np.random.randn(n))
65+
66+
## solution provided by ReHLine
67+
# build-in loss
68+
>>> clf = ReHLine(loss={'name': 'svm'}, C=C)
69+
>>> clf.make_ReLHLoss(X=X, y=y, loss={'name': 'svm'})
70+
>>> clf.fit(X=X)
71+
>>> print('sol privided by rehline: %s' %clf.coef_)
72+
>>> sol privided by rehline: [ 0.74104604 -0.00622664 2.66991198]
73+
>>> print(clf.decision_function([[1,2,3]]))
74+
>>> [0.87383287]
75+
76+
# manually specify params
77+
>>> n, d = X.shape
78+
>>> U = -(C*y).reshape(1,-1)
79+
>>> L = U.shape[0]
80+
>>> V = (C*np.array(np.ones(n))).reshape(1,-1)
81+
82+
>>> clf = ReHLine(loss={'name': 'svm'}, C=C)
83+
>>> clf.U, clf.V = U, V
84+
>>> clf.fit(X=X)
85+
>>> print('sol privided by rehline: %s' %clf.coef_)
86+
>>> sol privided by rehline: [ 0.7410154 -0.00615574 2.66990408]
87+
>>> print(clf.decision_function([[1,2,3]]))
88+
>>> [0.87384162]
5089
"""
5190

5291
def __init__(self, loss={'name':'QR', 'qt':[.25, .75]}, C=1.,
@@ -73,11 +112,15 @@ def __init__(self, loss={'name':'QR', 'qt':[.25, .75]}, C=1.,
73112
self.H = S.shape[0]
74113
self.K = A.shape[0]
75114

76-
def make_ReLHLoss(self, X, y, loss={'name':'QR', 'qt':[.25, .75]}):
115+
def make_ReLHLoss(self, X, y, loss={}):
77116
"""Generate ReLoss params based on the given training data.
78117
79118
"""
80-
self.loss.update(loss)
119+
if (loss=={}) or (loss==self.loss):
120+
pass
121+
else:
122+
print('Loss has been updated!')
123+
self.loss.update(loss)
81124

82125
n, d = X.shape
83126

tests/_test_fairsvm.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
## Test SVM on simulated dataset
2+
import numpy as np
3+
from rehline import ReHLine
4+
from rehline import make_fair_classification
5+
6+
# simulate classification dataset
7+
X, y, X_sen = make_fair_classification()
8+
n, d = X.shape
9+
10+
## solution provided by ReHLine
11+
# build-in hinge loss for svm
12+
clf = ReHLine(loss={'name': 'svm'}, C=C)
13+
clf.make_ReLHLoss(X=X, y=y, loss={'name': 'svm'})
14+
15+
# specific the param of FairSVM
16+
A = np.repeat([X_sen @ X], repeats=[2], axis=0) / n
17+
A[1] = -A[1]
18+
# suppose the fair tolerance is 0.01
19+
b = np.array([.01, .01])
20+
clf.A, clf.b = A, b
21+
clf.fit(X=X)
22+
23+
print('solution privided by rehline: %s' %clf.coef_)
24+
score = X@clf.coef_
25+
cor_sen = np.mean(score * X_sen)
26+
print('correlation btw score and X_sen is: %.3f' %cor_sen)

tests/_test_svm.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
## Test SVM on simulated dataset
2+
import numpy as np
3+
from rehline import ReHLine
4+
5+
np.random.seed(1024)
6+
# simulate classification dataset
7+
n, d, C = 1000, 3, 0.5
8+
X = np.random.randn(1000, 3)
9+
beta0 = np.random.randn(3)
10+
y = np.sign(X.dot(beta0) + np.random.randn(n))
11+
12+
## solution provided by sklearn
13+
from sklearn.svm import LinearSVC
14+
clf = LinearSVC(C=C, loss='hinge', fit_intercept=False,
15+
random_state=0, tol=1e-6, max_iter=1000000)
16+
clf.fit(X, y)
17+
sol = clf.coef_.flatten()
18+
19+
print('solution privided by liblinear: %s' %sol)
20+
21+
## solution provided by ReHLine
22+
# build-in loss
23+
clf = ReHLine(loss={'name': 'svm'}, C=C)
24+
clf.make_ReLHLoss(X=X, y=y, loss={'name': 'svm'})
25+
clf.fit(X=X)
26+
27+
print('solution privided by rehline: %s' %clf.coef_)
28+
print(clf.decision_function([[.1,.2,.3]]))
29+
30+
# manually specify params
31+
n, d = X.shape
32+
U = -(C*y).reshape(1,-1)
33+
L = U.shape[0]
34+
V = (C*np.array(np.ones(n))).reshape(1,-1)
35+
36+
clf = ReHLine(loss={'name': 'svm'}, C=C)
37+
clf.U, clf.V = U, V
38+
clf.fit(X=X)
39+
40+
print('solution privided by rehline: %s' %clf.coef_)
41+
print(clf.decision_function([[.1,.2,.3]]))

0 commit comments

Comments
 (0)