Skip to content

Commit 422dde7

Browse files
Added files associated to the Summer School of ICORS2025
1 parent 77fac21 commit 422dde7

File tree

8 files changed

+524
-0
lines changed

8 files changed

+524
-0
lines changed

ICORS2025summerSchool/MregEASY.m

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
function outIRWLS = mregEASY(y,X,initialbeta,c,initialscale)
2+
%IRWLSregS (iterative reweighted least squares) does refsteps refining steps from initialbeta for S estimator
3+
%
4+
% Required input arguments:
5+
%
6+
% y: A vector with n elements that contains the response variable.
7+
% It can be both a row or column vector.
8+
% X : Data matrix of explanatory variables (also called 'regressors')
9+
% of dimension (n x p). Rows of X represent observations, and
10+
% columns represent variables.
11+
% initialbeta : p x 1 vector containing initial estimate of beta
12+
% c : consistency factor
13+
%
14+
% Optional input arguments:
15+
%
16+
% initialscale: scalar, initial estimate of the scale. If not defined,
17+
% scaled MAD of residuals is used.
18+
%
19+
% Output:
20+
%
21+
% The output consists of a structure 'outIRWLS' containing the following fields:
22+
% betarw : p x 1 vector. Estimate of beta after refsteps refining steps
23+
% scalerw : scalar. Estimate of scale after refsteps refining step
24+
% weights : n x 1 vector. Weights assigned to each observation
25+
%
26+
% In the IRWLS procedure the value of beta and the value of the scale are
27+
% updated in each step
28+
29+
%% Beginning of code
30+
delta=0.199;
31+
32+
% Residuals for the initialbeta
33+
res = y - X * initialbeta;
34+
35+
% The scaled MAD of residuals is the initial scale estimate default value
36+
if (nargin < 4)
37+
initialscale = median(abs(res))/.6745;
38+
end
39+
40+
beta = initialbeta;
41+
scale = initialscale;
42+
reftol=1e-7;
43+
refsteps=100;
44+
iter = 0;
45+
betadiff = 9999;
46+
47+
while ( (betadiff > reftol) && (iter < refsteps) )
48+
iter = iter + 1;
49+
50+
% Solve for the scale (do just one iteration)
51+
meanrho=mean(TBrho(res/scale,c));
52+
% new scale = old scale *sqrt (mean(rho)/delta)
53+
scale = scale * sqrt(meanrho / delta );
54+
55+
% Compute n x 1 vector of weights (using TB)
56+
57+
weights = TBwei(res/scale,c);
58+
59+
sqweights = weights.^(1/2);
60+
61+
% Xw = [X(:,1) .* sqweights X(:,2) .* sqweights ... X(:,end) .* sqweights]
62+
Xw = bsxfun(@times, X, sqweights);
63+
yw = y .* sqweights;
64+
65+
% estimate of beta from (re)weighted regression (RWLS)
66+
newbeta = Xw\yw;
67+
68+
%newbeta= inv(Xw'*Xw)*Xw'*yw;
69+
% Inefficient way of obtaining updated estimated of beta
70+
% inv(X'*diag(weights)*X)*X'*diag(weights)*y
71+
72+
73+
% exit from the loop if the new beta has singular values. In such a
74+
% case, any intermediate estimate is not reliable and we can just
75+
% keep the initialbeta and initial scale.
76+
if (any(isnan(newbeta)))
77+
newbeta = initialbeta;
78+
scale = initialscale;
79+
weights = NaN;
80+
break
81+
end
82+
83+
% betadiff is linked to the tolerance (specified in scalar reftol)
84+
betadiff = norm(beta - newbeta,1) / norm(beta,1);
85+
86+
% update residuals and beta
87+
res = y - X * newbeta;
88+
beta = newbeta;
89+
90+
end
91+
92+
% store final estimate of beta
93+
outIRWLS.betarw = newbeta;
94+
% store final estimate of scale
95+
outIRWLS.scalerw = scale;
96+
% store final estimate of the weights for each observation
97+
outIRWLS.weights=weights;
98+
end
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
%% Example of M estimators in linear regression
2+
rng(1234)
3+
n=200;
4+
p=5;
5+
% data contamination (20 per cent)
6+
outl=1:40;
7+
X=[ones(n,1) randn(n,p-1)];
8+
sig=0.02;
9+
y=X*3*ones(p,1)+sig*randn(n,1);
10+
% Point mass contamination for 20% of the observations
11+
y(outl)=-20;
12+
X(outl,2:end)=2;
13+
group=ones(n,1);
14+
group(outl)=2;
15+
% plot the data and show the point mass contamination
16+
yXplot(y,X,group);
17+
18+
%% Set initial values and call mregEASY
19+
initialbeta=randn(p,1);
20+
initialscale=mad(y,1)/0.675;
21+
c=1.547;
22+
out=MregEASY(y,X,initialbeta,c,initialscale);
23+
resM=y-X*out.betarw;
24+
resindexplot(resM)
25+
26+
%% Compare with non robust fit
27+
outOLS=fitlm(X(:,2:end),y);
28+
disp(outOLS)
29+
resindexplot(outOLS.Residuals{:,2})
30+
31+
%% Apply automatic outlier detection procedure based on the forward search
32+
X1=X(:,2:end);
33+
FSR(y,X1)
34+
35+
%% Forward search with exploratory purposes
36+
outLXS=LXS(y,X1);
37+
outFS=FSReda(y,X1,outLXS.bs);
38+
resfwdplot(outFS,'databrush',1)
39+

ICORS2025summerSchool/MscaleEASY.m

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
function sc = MscaleEASY(u, c, initialsc, tol, maxiter)
2+
%Mscale finds the M estimator of the scale
3+
%
4+
%
5+
%<a href="matlab: docsearchFS('Mscale')">Link to the help function</a>
6+
%
7+
% Required input arguments:
8+
%
9+
% u: : residuals or Mahalanobis distances. Vector.
10+
% n x 1 vector which contains the scaled residuals or
11+
% Mahalanobis distances
12+
% Data Types - single | double
13+
% psifunc.c1 = consistency factor (and other parameters)
14+
% associated to required breakdown point or nominal
15+
% efficiency.
16+
% More precisely, psifunc.c1(1) contains consistency
17+
% factor associated to required breakdown point or
18+
% nominal efficiency psifunc.c1(2:end) contain other
19+
% parameters associated with the rho (psi) function.
20+
% Example - psifunc.class='TB';psifunc.c1=1.5476;psifunc.kc1=0.1996
21+
% Data Types - struct
22+
%
23+
% Optional input arguments:
24+
%
25+
% initialsc: scalar. The initial estimate of the scale.
26+
% If not defined, scaled MAD of vector |u| is used.
27+
% Example - 'initialsc',0.34
28+
% Data Types - double
29+
% tol : scalar. The tolerance for controlling convergence.
30+
% If not defined, tol is fixed to 1e-7.
31+
% Example - 'tol',1e-10
32+
% Data Types - double
33+
% maxiter : scalar. Maximum number of iterations to find the scale.
34+
% If not defined, maxiter is fixed to 200.
35+
% Example - 'maxiter',100
36+
% Data Types - double
37+
%
38+
% Output:
39+
%
40+
% sc : M-estimate of the scale. Scalar.
41+
% Robust M estimate of scale.
42+
% This routine is called by Taureg.m and Sreg.m
43+
%
44+
% More About:
45+
%
46+
% u = residuals or Mahalanobis distances
47+
% (note that u is kept fixed in each iteration).
48+
% Remark: the M estimator of scale must satisfy the following equation
49+
% \[
50+
% (1/n) \sum_{i=1}^n \rho((u_i/c)/s) = kc
51+
% \]
52+
%
53+
% This routine computes the value of $s$ which satisfies the above
54+
% equation.
55+
%
56+
% See also: Mscale1, minscale
57+
%
58+
% References:
59+
%
60+
% Huber P. and Ronchetti E. (2009), Robust Statistics, Wiley
61+
% (equation 7.119, p. 176).
62+
%
63+
%
64+
% Copyright 2008-2017.
65+
% Written by FSDA team
66+
%
67+
%
68+
%<a href="matlab: docsearchFS('Mscale')">Link to the help page for this function</a>
69+
%
70+
%$LastChangedDate:: 2017-11-17 15:01:40 #$: Date of the last commit
71+
%
72+
% Examples:
73+
74+
%{
75+
% Example of M estimate of scale.
76+
% M estimate of the scale using Tukey biweight rho function with a
77+
% value of c associated to a breakdown point of 0.5.
78+
psifunc=struct;
79+
psifunc.class='TB';
80+
bdp=0.5;
81+
c=TBbdp(bdp,1);
82+
% kc = E(rho) = sup(rho)*bdp
83+
kc=c^2/6*bdp;
84+
psifunc.c1=c;
85+
psifunc.kc1=kc;
86+
n=10000;
87+
shift=5;
88+
u=2*randn(n,1);
89+
u(1:10)=u(1:10)+shift;
90+
s=Mscale(u,psifunc)
91+
%}
92+
93+
%{
94+
% Estimate of scale using Hampel rho function.
95+
% M estimate of the scale using Hampel rho function with a
96+
% value of c associated to a breakdown point of 0.5
97+
psifunc=struct;
98+
psifunc.class='HA'
99+
abc=[1.5 3.5 8];
100+
bdp=0.5;
101+
c=HAbdp(bdp,1,abc);
102+
% kc = E(rho) = sup(rho)*bdp
103+
kc=HArho(c*abc(3),[c, abc])*bdp;
104+
psifunc.c1=[c abc];
105+
psifunc.kc1=kc;
106+
n=10000;
107+
shift=5;
108+
u=3*randn(n,1);
109+
u(1:10)=u(1:10)+shift;
110+
s=Mscale(u,psifunc)
111+
%}
112+
113+
%% Beginning of code
114+
115+
K=0.1996;
116+
117+
% M-estimator of scale using the requested rho function.
118+
119+
if nargin<5
120+
maxiter = 200;
121+
end
122+
123+
if nargin<4
124+
tol = 1e-7;
125+
end
126+
127+
if nargin<3
128+
sc=median(abs(u))/.6745;
129+
else
130+
sc=initialsc;
131+
end
132+
133+
loop = 0;
134+
err = 1;
135+
while (( loop < maxiter ) && (err > tol))
136+
% scale step: see equation 7.119 of Huber and Ronchetti, p. 176
137+
% scalenew = scaleold *(1/n)*\sum \rho(u_i/scaleold) / kc
138+
scnew = sc*sqrt( mean(TBrho(u/sc,c)) / K);
139+
err = abs(scnew/sc - 1);
140+
sc = scnew;
141+
% disp(sc)
142+
loop = loop+1;
143+
end
144+
% disp(loop)
145+
% sc=sc;
146+
end
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
%% Example of robust estimate of scale
2+
rng(1)
3+
nout=5;
4+
n=100;
5+
u=[randn(n,1); 200*ones(nout,1)];
6+
est=MscaleEASY(u,1.5476);
7+
8+
9+
%% Simulation study to check the distribution of robust estimate of scale
10+
rng(1)
11+
nout=5;
12+
n=1000;
13+
nsimul=10000;
14+
est=zeros(nsimul,1);
15+
for i=1:nsimul
16+
%u=randn(n,1);
17+
u=2*[randn(n,1); 200*ones(nout,1)];
18+
19+
est(i)=MscaleEASY(u,1.5476);
20+
end
21+
22+
boxplot(est)
23+

ICORS2025summerSchool/TBbdpEASY.m

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
function c = TBbdpEASY(bdp)
2+
%TBbdp finds the constant c associated to the supplied breakdown point for Tukey's biweight
3+
% The constant is found through a dichotomic search
4+
%
5+
%
6+
%<a href="matlab: docsearchFS('TBbdpEASY')">Link to the help function</a>
7+
%
8+
% Required input arguments:
9+
%
10+
% bdp : breakdown point. Scalar. Scalar defining breakdown point
11+
% (i.e a number between 0 and 0.5)
12+
% Data Types - single|double
13+
%
14+
% Optional input arguments:
15+
%
16+
% Output:
17+
%
18+
% c : Requested tuning constant. Scalar. Tuning constatnt of Tukey Biweight
19+
% function associated to requested breakdown point
20+
%
21+
%
22+
% See also: OPTbdp, HYPbdp, HAbdp
23+
%
24+
% References:
25+
%
26+
% Atkinson et al. (2025),
27+
%
28+
% Copyright 2008-2025.
29+
% Written by FSDA team
30+
%
31+
%
32+
%<a href="matlab: docsearchFS('TBbdpEASY')">Link to the help page for this function</a>
33+
%
34+
%
35+
%
36+
% Examples:
37+
%
38+
%{
39+
% Find c given bdp.
40+
% The constant c associated to a breakdown point of 50% in regression is
41+
% c=1.547644980928226
42+
c=TBbdpEASY(0.5)
43+
%}
44+
%
45+
46+
%% Beginning of code
47+
48+
% c = starting point of the iteration
49+
c=5;
50+
% step = width of the dichotomic search (it decreases by half at each
51+
% iteration). Generally it can be smaller. A large value ensures converge
52+
% when bdp is very small and p is very large.
53+
step=200;
54+
55+
% Convergence condition is E(\rho) = \rho(c) bdp
56+
% where \rho(c) for TB is c^2/6
57+
Erho1=10;
58+
eps=1e-11;
59+
while abs(Erho1-1)>eps
60+
61+
c2=c.^2;
62+
63+
Erho= (chi2cdf(c2,3)/2-3*chi2cdf(c2,5)./(2*c2)+...
64+
+15*chi2cdf(c2,7)./(6*(c.^4))+ ((c.^2)/3).*(1-normcdf(c)));
65+
66+
Erho1=(Erho./(c.^2))*(6/bdp);
67+
68+
step=step/2;
69+
if Erho1>1
70+
c=c+step;
71+
else
72+
c=max(c-step,0.1);
73+
end
74+
disp([step c Erho1])
75+
end
76+
end
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
% Find constant c associated to bdp=0.5 for TB
2+
3+
c=TBbdpEASY(0.5);

0 commit comments

Comments
 (0)