Skip to content

Commit 41beb4c

Browse files
add package and description
1 parent d0cbc4b commit 41beb4c

File tree

8 files changed

+378
-1
lines changed

8 files changed

+378
-1
lines changed

README.md

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,25 @@
11
# robust_linear_regression
2-
Python implementation of robust linear regression in the multiple regression and multivariate regression cases using the minimum covariance determinant
2+
3+
Python implementation of robust linear regression in the multiple regression and multivariate regression cases using the minimum covariance determinant (MCD).
4+
5+
Usage ([examples/example.ipynb](examples/example.ipynb)):
6+
```python
7+
>>> from robust_linear_regression import RobustLinearRegression
8+
>>> rlr = RobustLinearRegression()
9+
>>> x = data[:, 0, None] # n x d_x, must be 2D
10+
>>> y = data[:, 1:] # n x d_y, must be 2D
11+
>>> rlr.fit(x, y)
12+
>>> residuals = y - rlr.predict(x)
13+
>>> slope = rlr.beta
14+
>>> intercept = rlr.alpha
15+
```
16+
17+
Installation:
18+
- Development
19+
```bash
20+
$ git clone https://github.com/stevenstetzler/robust_linear_regression.git
21+
$ python -m pip install -e robust_linear_regression
22+
```
23+
24+
References:
25+
- Peter J Rousseeuw, Stefan Van Aelst, Katrien Van Driessen & Jose A Gulló (2004) Robust Multivariate Regression, Technometrics, 46:3, 293-305, DOI: [10.1198/004017004000000329](https://doi.org/10.1198/004017004000000329)

examples/data.txt

Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
0.000000000000000000e+00 2.811252029175648204e-02 2.409757503915965771e-02
2+
1.723443157970905304e-03 2.781008310006427564e-02 2.387365009068620481e-02
3+
3.447782248258590698e-03 2.566874328169888031e-02 2.278013047796978441e-02
4+
6.905692163854837418e-03 2.700268409301997963e-02 2.329597428319285513e-02
5+
6.905692163854837418e-03 2.606713263418214410e-02 2.086260406163376047e-02
6+
8.613625541329383850e-03 2.929881722764093865e-02 2.298830366076032306e-02
7+
8.613625541329383850e-03 2.672881113022640420e-02 2.306193055831684546e-02
8+
1.033442467451095581e-02 2.532311207653492602e-02 2.142076709433737847e-02
9+
1.033442467451095581e-02 2.642364849214118294e-02 2.285091585040621709e-02
10+
1.204316550865769386e-02 2.834184547543827648e-02 2.427568316796691050e-02
11+
1.204316550865769386e-02 2.617416759841262319e-02 2.263877915604872726e-02
12+
1.548836799338459969e-02 2.562233436236738271e-02 2.223904253827058142e-02
13+
1.721155177801847458e-02 2.533280779732649535e-02 2.203748313554942939e-02
14+
1.721155177801847458e-02 2.560716041119803776e-02 2.473545914953678704e-02
15+
1.893162541091442108e-02 2.507431996997411261e-02 2.178058581746089573e-02
16+
2.072165347635746002e-02 2.479179134934383910e-02 2.155318686710128162e-02
17+
2.245335234329104424e-02 2.297402474175669340e-02 1.938018252343187697e-02
18+
2.245335234329104424e-02 2.543828018650629019e-02 2.247394836648464178e-02
19+
2.245335234329104424e-02 2.485894152800938173e-02 1.888604546147121255e-02
20+
2.245335234329104424e-02 2.453890133330105527e-02 2.136386166287973509e-02
21+
2.417073305696249008e-02 2.423199678889886854e-02 2.117969406893394080e-02
22+
2.588967792689800262e-02 2.539746611580540048e-02 2.016629233573841162e-02
23+
2.588967792689800262e-02 2.402614672263325701e-02 2.105110523858844118e-02
24+
2.588967792689800262e-02 2.251944096616398383e-02 2.186327447723002138e-02
25+
2.761984197422862053e-02 2.372788702081152223e-02 2.072334738860082126e-02
26+
2.934610517695546150e-02 2.341390395446296679e-02 2.051633184610679450e-02
27+
3.106554970145225525e-02 2.313640852031539907e-02 2.030835604518355098e-02
28+
3.279158705845475197e-02 2.286542615718190063e-02 2.006608331755721508e-02
29+
3.450474794954061508e-02 2.227773085360240657e-02 1.988473577647376089e-02
30+
3.622867166996002197e-02 2.235026339866408307e-02 1.965232037206998683e-02
31+
3.794764261692762375e-02 2.381049907864962734e-02 2.007208385615655288e-02
32+
3.967674402520060539e-02 2.348512637161093153e-02 1.995687953488811672e-02
33+
3.967674402520060539e-02 1.930021165503603697e-02 1.853189387031761726e-02
34+
4.140168381854891777e-02 2.153502103146820446e-02 1.903802269298093819e-02
35+
4.313046485185623169e-02 2.127190873386552994e-02 1.883469660443459759e-02
36+
4.485950665548443794e-02 2.194866859724697861e-02 2.084183912876191869e-02
37+
4.485950665548443794e-02 2.097410646314301630e-02 1.863398574068586555e-02
38+
4.832960991188883781e-02 1.913441391525338986e-02 2.011174893985501200e-02
39+
4.832960991188883781e-02 2.038864287129626973e-02 1.822371188855331781e-02
40+
5.007123854011297226e-02 1.869157302422763678e-02 1.897896352179717638e-02
41+
5.007123854011297226e-02 2.013582126119217719e-02 1.798820008193402487e-02
42+
5.178315518423914909e-02 1.729498928534667357e-02 1.727152270823761882e-02
43+
5.178315518423914909e-02 1.987976615180286899e-02 1.775657524013052324e-02
44+
5.349761852994561195e-02 2.095557407676551520e-02 1.564196390515615320e-02
45+
5.349761852994561195e-02 1.950839892134581532e-02 1.777801562442604677e-02
46+
5.522092897444963455e-02 1.934404407995771180e-02 1.735890546269569512e-02
47+
5.693899467587471008e-02 1.905227861857383687e-02 1.711131758146677839e-02
48+
5.866472516208887100e-02 1.886431351141482082e-02 1.701231248461532175e-02
49+
5.866472516208887100e-02 1.729472241964913337e-02 1.875121042463234744e-02
50+
6.037651747465133667e-02 1.850343538239940244e-02 1.676669678932629637e-02
51+
6.209503859281539917e-02 1.824226726330380188e-02 1.653943678555691577e-02
52+
6.380691286176443100e-02 1.796967205240207477e-02 1.630535105948904828e-02
53+
6.552319461479783058e-02 1.762731252239291280e-02 1.612906670183988922e-02
54+
6.725320592522621155e-02 1.612301678426319995e-02 1.834827750008294345e-02
55+
6.725320592522621155e-02 1.740130096953862449e-02 1.591344645635395239e-02
56+
7.070347759872674942e-02 1.684264649588840257e-02 1.552526369103368609e-02
57+
7.242587953805923462e-02 1.665464982374942338e-02 1.530769102111140256e-02
58+
7.414547307416796684e-02 1.629046832533731504e-02 1.512059426369916082e-02
59+
7.586677372455596924e-02 1.610796512454726326e-02 1.487221909264402342e-02
60+
7.758291438221931458e-02 1.580181562093230241e-02 1.465097898424083667e-02
61+
7.930142758414149284e-02 1.555783249841624638e-02 1.453247144773150978e-02
62+
8.102221181616187096e-02 1.491277869467921846e-02 1.432974109948581543e-02
63+
8.274131407961249352e-02 1.495541910639985872e-02 1.397331627416953381e-02
64+
8.445633249357342720e-02 1.678697653375138543e-02 1.332771271581378869e-02
65+
8.445633249357342720e-02 1.471517263240684770e-02 1.377973346144134581e-02
66+
8.617100585252046585e-02 1.445145224101906933e-02 1.360054328271687041e-02
67+
8.788558747619390488e-02 1.416649749347698162e-02 1.332275135469984662e-02
68+
8.961289655417203903e-02 1.381566116208432504e-02 1.316317164811486862e-02
69+
9.133029030635952950e-02 1.171619477213425853e-02 1.448641364800629106e-02
70+
9.133029030635952950e-02 1.360109432073386415e-02 1.305911066979614787e-02
71+
9.303836058825254440e-02 1.096525548069848810e-02 1.402364132667077712e-02
72+
9.303836058825254440e-02 1.332628271916291851e-02 1.267142456625247604e-02
73+
9.475733712315559387e-02 1.306311985297270439e-02 1.249997288124937711e-02
74+
9.475733712315559387e-02 1.054540082105859256e-02 1.304450455508465723e-02
75+
9.647158952429890633e-02 1.277371285095796338e-02 1.230768962899109198e-02
76+
9.818239230662584305e-02 1.021462448943566415e-02 1.306707351316482857e-02
77+
9.818239230662584305e-02 1.248040079889278786e-02 1.211753158656936336e-02
78+
9.990139026194810867e-02 1.231240299694036366e-02 1.196918489176912459e-02
79+
1.016251887194812298e-01 1.191115759934291418e-02 1.175373822289582648e-02
80+
1.033431133255362511e-01 1.176774612531517050e-02 1.152692581023906371e-02
81+
1.033431133255362511e-01 1.254791993522985649e-02 9.586409755612201877e-03
82+
1.050651464611291885e-01 1.138453124940497219e-02 1.128599611646130541e-02
83+
1.077705682255327702e-01 8.818172713404237584e-03 1.213727600511838034e-02
84+
1.077705682255327702e-01 1.102146572264928182e-02 1.102311347426798704e-02
85+
1.094823838211596012e-01 1.075931224755777293e-02 1.081023948801806966e-02
86+
1.111959596164524555e-01 1.047363020575176051e-02 1.049524306815996511e-02
87+
1.129225464537739754e-01 1.023219122754426280e-02 1.035972145028196678e-02
88+
1.129225464537739754e-01 7.806814726279753813e-03 1.059719102084244469e-02
89+
1.146470378153026104e-01 9.895561382677442452e-03 1.011755214457554786e-02
90+
1.163756987079977989e-01 7.503417219254515658e-03 1.070313799742894645e-02
91+
1.163756987079977989e-01 9.626728626358271868e-03 9.940697997651959383e-03
92+
1.181075158528983593e-01 9.042882278663455509e-03 1.154306136269855898e-02
93+
1.181075158528983593e-01 1.153673352985151723e-02 8.798361952227828908e-03
94+
1.215520580299198627e-01 7.149568483725943224e-03 7.367898706205799897e-03
95+
1.215520580299198627e-01 8.812857091811565624e-03 9.321837984075820316e-03
96+
1.215520580299198627e-01 9.129983590355550405e-03 7.294259908858791164e-03
97+
1.215520580299198627e-01 1.032208354433805653e-02 1.027412244518721707e-02
98+
1.215520580299198627e-01 1.158086097086652444e-02 9.813283295946995111e-03
99+
1.232701162807643414e-01 6.461633515186804289e-03 1.088791108692532816e-02
100+
1.249823798425495625e-01 8.285438540099221427e-03 8.855767010404136386e-03
101+
1.266962508670985699e-01 8.039106184185129678e-03 8.646451496827189942e-03
102+
1.266962508670985699e-01 6.971229417956692487e-03 1.071102639013954416e-02
103+
1.266962508670985699e-01 5.911707123573251010e-03 7.755205982473256654e-03
104+
1.284227850846946239e-01 7.730076215409553697e-03 8.472234832517244740e-03
105+
1.301383790560066700e-01 8.162102891105860181e-03 1.032114311382326122e-02
106+
1.335909408517181873e-01 6.954442329174526094e-03 7.824982375267630630e-03
107+
1.353192110545933247e-01 6.321920005689207755e-03 5.709813180290623791e-03
108+
1.353192110545933247e-01 8.173654506890670746e-03 6.157683474786956879e-03
109+
1.353192110545933247e-01 7.947580693837608123e-03 8.325093673684058615e-03
110+
1.370473015122115612e-01 5.719969833421600924e-03 9.563867494305711148e-03
111+
1.370473015122115612e-01 4.374478946488125075e-03 6.227416106639793725e-03
112+
1.370473015122115612e-01 8.445825234616677335e-03 6.300595622119153916e-03
113+
1.424674922600388527e-01 4.921396694328450394e-03 9.353326207360623812e-03
114+
1.441986742429435253e-01 5.225015800647270225e-03 6.514815397718365375e-03
115+
1.441986742429435253e-01 7.460247184326362913e-03 7.441466848521649524e-03
116+
1.459179162047803402e-01 7.591837161839976034e-03 7.392394774996091655e-03
117+
1.476416094228625298e-01 4.725366208390369138e-03 6.131402432572485850e-03
118+
1.493742745369672775e-01 4.397597339902858948e-03 5.886265300440562953e-03
119+
1.510983277112245560e-01 4.131144021869204153e-03 5.708070755693128717e-03
120+
1.528125945478677750e-01 3.271472488108884136e-03 3.740612245571206529e-03
121+
1.545375376008450985e-01 1.430143828486052371e-03 4.857486839134672607e-03
122+
1.545375376008450985e-01 3.610998008298338391e-03 5.245112084462988378e-03
123+
1.562671926803886890e-01 4.785575301923472580e-03 3.233943011005990797e-03
124+
1.562671926803886890e-01 3.433450454394915141e-03 5.064693856626334423e-03
125+
1.579886912368237972e-01 6.088205404353175254e-04 4.296159128809762251e-03
126+
1.597071043215692043e-01 2.837731160866496793e-03 4.669130869461746158e-03
127+
1.597071043215692043e-01 4.749028814671873988e-03 5.222265398776571033e-03
128+
1.614367850124835968e-01 2.520870361138349836e-03 4.451305948180994676e-03
129+
1.631486713886260986e-01 7.988359512864917633e-04 6.432445849556600592e-03
130+
1.631486713886260986e-01 2.117762502791720181e-03 4.443460279281552516e-03
131+
1.648718635551631451e-01 1.918794542859814101e-03 3.556197959565743361e-03
132+
1.648718635551631451e-01 2.136074638144691562e-03 5.970114095314116298e-03
133+
1.683029700070619583e-01 1.511731473613053822e-03 3.607114290542945412e-03
134+
1.700285770930349827e-01 0.000000000000000000e+00 1.618839093693758002e-03
135+
1.700285770930349827e-01 1.193110891392734629e-03 3.422091504858393307e-03
136+
1.734727607108652592e-01 6.062010845084842003e-04 2.941514954142299132e-03
137+
1.751947007142007351e-01 1.833171461385063594e-04 0.000000000000000000e+00

examples/example.ipynb

Lines changed: 92 additions & 0 deletions
Large diffs are not rendered by default.

examples/example.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
from robust_linear_regression import RobustLinearRegression
2+
import numpy as np
3+
import pathlib
4+
import os
5+
import matplotlib.pyplot as plt
6+
7+
def main():
8+
data = np.loadtxt(os.path.join(os.path.dirname(__file__), "data.txt"))
9+
rlr = RobustLinearRegression()
10+
x = data[:, 0, None]
11+
y = data[:, 1:]
12+
rlr.fit(x, y)
13+
line_x = np.array([[-0.02], [0.20]])
14+
line_y = rlr.predict(line_x)
15+
fig, axd = plt.subplot_mosaic([['y1', 'd'], ['y2', 'd']], dpi=150)
16+
plt.sca(axd["y1"])
17+
plt.scatter(x[~rlr.outliers, 0], y[~rlr.outliers, 0], s=2, label="inliers")
18+
plt.scatter(x[rlr.outliers, 0], y[rlr.outliers, 0], s=2, label="outliers")
19+
plt.plot(line_x, line_y[:, 0], color="k", zorder=-1, label="line")
20+
plt.legend()
21+
plt.xlabel("x")
22+
plt.ylabel("y_1")
23+
plt.sca(axd["y2"])
24+
plt.scatter(x[~rlr.outliers, 0], y[~rlr.outliers, 1], s=2, label="inliers")
25+
plt.scatter(x[rlr.outliers, 0], y[rlr.outliers, 1], s=2, label="outliers")
26+
plt.plot(line_x, line_y[:, 1], color="k", zorder=-1, label="line")
27+
plt.xlabel("x")
28+
plt.ylabel("y_2")
29+
plt.legend()
30+
plt.sca(axd["d"])
31+
plt.scatter(rlr.d_x, rlr.d_r, s=2)
32+
plt.axvline(rlr.d_x_threshold, color="k")
33+
plt.axhline(rlr.d_r_threshold, color="k")
34+
plt.xlabel("d(x)")
35+
plt.ylabel("d(r)")
36+
fig.align_ylabels()
37+
fig.tight_layout()
38+
plt.show()
39+
40+
if __name__ == "__main__":
41+
main()

requirements.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
numpy
2+
scipy
3+
scikit-learn
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
from .rlr import *

robust_linear_regression/rlr.py

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
import numpy as np
2+
import scipy
3+
from scipy.stats import chi
4+
from sklearn.covariance import MinCovDet
5+
6+
class RobustLinearRegression():
7+
def __init__(self, support_fraction=None):
8+
self.support_fraction = support_fraction
9+
10+
def fit(self, X, Y):
11+
self.X = X
12+
self.Y = Y
13+
self.n, self.x_dim = self.X.shape
14+
Y_n, self.y_dim = self.Y.shape
15+
if self.n != Y_n:
16+
raise Exception("X and Y must have the name number of points")
17+
18+
self.d_r_threshold = chi.ppf(0.975, df=self.y_dim)
19+
self.d_x_threshold = chi.ppf(0.975, df=self.x_dim)
20+
21+
self.Z = np.hstack([self.X, self.Y])
22+
23+
# scale for numerical stability of mean/covariance matrix?
24+
shift_Z = - self.Z.min(axis=0) / (self.Z.max(axis=0) - self.Z.min(axis=0))
25+
scale_Z = 1 / (self.Z.max(axis=0) - self.Z.min(axis=0))
26+
A = np.diag(scale_Z)
27+
b = shift_Z
28+
self.Z = self.Z @ A + b
29+
30+
mcd = MinCovDet(support_fraction=self.support_fraction)
31+
mcd.fit(self.Z)
32+
33+
self.mu = (mcd.location_ - b) @ np.linalg.inv(A)
34+
self.covar = np.linalg.inv(A.T) @ mcd.covariance_ @ np.linalg.inv(A)
35+
36+
self.mu_x = self.mu[:self.x_dim]
37+
self.mu_y = self.mu[self.x_dim:]
38+
39+
self.sigma_xx = self.covar[:self.x_dim, :self.x_dim]
40+
self.sigma_xy = self.covar[:self.x_dim, self.x_dim:]
41+
self.sigma_yy = self.covar[self.x_dim:, self.x_dim:]
42+
43+
self.beta = scipy.linalg.pinvh(self.sigma_xx) @ self.sigma_xy
44+
self.sigma_e = self.sigma_yy - self.beta.T @ self.sigma_xx @ self.beta
45+
46+
self.alpha = self.mu_y - self.beta.T @ self.mu_x
47+
48+
y_hat = self.predict(self.X)
49+
self.r = self.Y - y_hat
50+
51+
self.d_r = ((self.r @ scipy.linalg.pinvh(self.sigma_e) * self.r).sum(1))**0.5
52+
self.d_x = (((self.X - self.mu_x) @ scipy.linalg.pinvh(self.sigma_xx) * (self.X - self.mu_x)).sum(1))**0.5
53+
54+
if np.any(np.isnan(self.d_r)) or np.any(np.isnan(self.d_x)):
55+
raise Exception("Regression produced invalid robust distances")
56+
else:
57+
self.residual_outliers = self.d_r > self.d_r_threshold
58+
self.good_leverage_points = self.d_x < self.d_x_threshold
59+
self.outliers = self.residual_outliers
60+
61+
def predict(self, x):
62+
if self.beta is None or self.alpha is None:
63+
raise Exception("Must fit regressor to data!")
64+
return x @ self.beta + self.alpha
65+

setup.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
import setuptools
2+
3+
setuptools.setup(
4+
name="robust_linear_regression",
5+
version="0.0.1",
6+
author="Steven Stetzler",
7+
author_email="[email protected]",
8+
description="Robust linear regression in the multiple regression and multivariate regression cases using the minimum covariance determinant",
9+
packages=setuptools.find_packages(where="."),
10+
install_requires=[
11+
"numpy",
12+
"scipy",
13+
"scikit-learn",
14+
]
15+
)

0 commit comments

Comments
 (0)