Added files associated to the Summer School of ICORS2025

MarcoRianiUNIPR · MarcoRianiUNIPR · commit 422dde7752db · 2025-05-14T23:21:07.000+02:00
diff --git a/ICORS2025summerSchool/MregEASY.m b/ICORS2025summerSchool/MregEASY.m
@@ -0,0 +1,98 @@
+function outIRWLS = mregEASY(y,X,initialbeta,c,initialscale)
+%IRWLSregS (iterative reweighted least squares) does refsteps refining steps from initialbeta for S estimator
+%
+%  Required input arguments:
+%
+%    y:         A vector with n elements that contains the response variable.
+%               It can be both a row or column vector.
+%    X :        Data matrix of explanatory variables (also called 'regressors')
+%               of dimension (n x p). Rows of X represent observations, and
+%               columns represent variables.
+% initialbeta : p x 1 vector containing initial estimate of beta
+%   c  : consistency factor
+%
+%  Optional input arguments:
+%
+% initialscale: scalar, initial estimate of the scale. If not defined,
+%               scaled MAD of residuals is used.
+%
+%  Output:
+%
+%  The output consists of a structure 'outIRWLS' containing the following fields:
+%      betarw  : p x 1 vector. Estimate of beta after refsteps refining steps
+%     scalerw  : scalar. Estimate of scale after refsteps refining step
+%     weights  : n x 1 vector. Weights assigned to each observation
+%
+% In the IRWLS procedure the value of beta and the value of the scale are
+% updated in each step
+
+%% Beginning of code
+delta=0.199;
+
+% Residuals for the initialbeta
+res = y - X * initialbeta;
+
+% The scaled MAD of residuals is the initial scale estimate default value
+if (nargin < 4)
+    initialscale = median(abs(res))/.6745;
+end
+
+beta = initialbeta;
+scale = initialscale;
+reftol=1e-7;
+refsteps=100;
+iter = 0;
+betadiff = 9999;
+
+while ( (betadiff > reftol) && (iter < refsteps) )
+    iter = iter + 1;
+    
+    % Solve for the scale (do just one iteration)
+    meanrho=mean(TBrho(res/scale,c));
+    % new scale = old scale *sqrt (mean(rho)/delta)
+    scale = scale * sqrt(meanrho / delta );
+    
+    % Compute n x 1 vector of weights (using TB)
+    
+    weights = TBwei(res/scale,c);
+    
+    sqweights = weights.^(1/2);
+    
+    % Xw = [X(:,1) .* sqweights X(:,2) .* sqweights ... X(:,end) .* sqweights]
+    Xw = bsxfun(@times, X, sqweights);
+    yw = y .* sqweights;
+    
+    % estimate of beta from (re)weighted regression (RWLS)
+    newbeta = Xw\yw;
+    
+    %newbeta= inv(Xw'*Xw)*Xw'*yw;
+    % Inefficient way of obtaining updated estimated of beta
+    % inv(X'*diag(weights)*X)*X'*diag(weights)*y
+    
+    
+    % exit from the loop if the new beta has singular values. In such a
+    % case, any intermediate estimate is not reliable and we can just
+    % keep the initialbeta and initial scale.
+    if (any(isnan(newbeta)))
+        newbeta = initialbeta;
+        scale = initialscale;
+        weights = NaN;
+        break
+    end
+    
+    % betadiff is linked to the tolerance (specified in scalar reftol)
+    betadiff = norm(beta - newbeta,1) / norm(beta,1);
+    
+    % update residuals and beta
+    res = y - X * newbeta;
+    beta = newbeta;
+    
+end
+
+% store final estimate of beta
+outIRWLS.betarw = newbeta;
+% store final estimate of scale
+outIRWLS.scalerw = scale;
+% store final estimate of the weights for each observation
+outIRWLS.weights=weights;
+end
diff --git a/ICORS2025summerSchool/MregEASYmain.m b/ICORS2025summerSchool/MregEASYmain.m
@@ -0,0 +1,39 @@
+%% Example of M estimators in linear regression
+rng(1234)
+n=200;
+p=5;
+% data contamination (20 per cent)
+outl=1:40;
+X=[ones(n,1) randn(n,p-1)];
+sig=0.02;
+y=X*3*ones(p,1)+sig*randn(n,1);
+% Point mass contamination for 20% of the observations
+y(outl)=-20; 
+X(outl,2:end)=2;
+group=ones(n,1);
+group(outl)=2;
+% plot the data and show the point mass contamination
+yXplot(y,X,group);
+
+%% Set initial values and call mregEASY
+initialbeta=randn(p,1);
+initialscale=mad(y,1)/0.675;
+c=1.547;
+out=MregEASY(y,X,initialbeta,c,initialscale);
+resM=y-X*out.betarw;
+resindexplot(resM)
+
+%% Compare with non robust fit
+outOLS=fitlm(X(:,2:end),y);
+disp(outOLS)
+resindexplot(outOLS.Residuals{:,2})
+
+%% Apply automatic outlier detection procedure based on the forward search
+X1=X(:,2:end);
+FSR(y,X1)
+
+%% Forward search with exploratory purposes
+outLXS=LXS(y,X1);
+outFS=FSReda(y,X1,outLXS.bs);
+resfwdplot(outFS,'databrush',1)
+
diff --git a/ICORS2025summerSchool/MscaleEASY.m b/ICORS2025summerSchool/MscaleEASY.m
@@ -0,0 +1,146 @@
+function sc = MscaleEASY(u, c, initialsc, tol, maxiter)
+%Mscale finds the M estimator of the scale
+%
+%
+%<a href="matlab: docsearchFS('Mscale')">Link to the help function</a>
+%
+% Required input arguments:
+%
+%    u:       : residuals or Mahalanobis distances. Vector.
+%               n x 1 vector which contains the scaled residuals or
+%               Mahalanobis distances
+%               Data Types - single | double
+%               psifunc.c1 = consistency factor (and other parameters)
+%                   associated to required breakdown point or nominal
+%                   efficiency.
+%                   More precisely, psifunc.c1(1) contains consistency
+%                   factor associated to required breakdown point or
+%                   nominal efficiency psifunc.c1(2:end) contain other
+%                   parameters associated with the rho (psi) function. 
+%               Example - psifunc.class='TB';psifunc.c1=1.5476;psifunc.kc1=0.1996
+%               Data Types - struct
+%
+%  Optional input arguments:
+%
+%    initialsc: scalar. The initial estimate of the scale.
+%               If not defined, scaled MAD of vector |u| is used.
+%               Example - 'initialsc',0.34 
+%               Data Types - double
+%     tol     : scalar. The tolerance for controlling convergence.
+%               If not defined, tol is fixed to 1e-7.
+%               Example - 'tol',1e-10 
+%               Data Types - double
+%     maxiter : scalar. Maximum number of iterations to find the scale.
+%               If not defined, maxiter is fixed to 200.
+%               Example - 'maxiter',100 
+%               Data Types - double
+%
+%  Output:
+%
+%  sc : M-estimate of the scale. Scalar.
+%       Robust M estimate of scale. 
+%       This routine is called by Taureg.m and Sreg.m
+%
+% More About:
+%
+% u = residuals or Mahalanobis distances
+% (note that u is kept fixed in each iteration).
+% Remark: the M estimator of scale must satisfy the following equation
+% \[
+%  (1/n) \sum_{i=1}^n \rho((u_i/c)/s) = kc
+% \]
+%
+% This routine computes the value of $s$ which satisfies the above
+% equation.
+%
+% See also: Mscale1, minscale
+%
+% References:
+%
+% Huber P. and Ronchetti E. (2009), Robust Statistics, Wiley 
+% (equation 7.119,  p. 176).
+%
+%
+% Copyright 2008-2017.
+% Written by FSDA team
+%
+%
+%<a href="matlab: docsearchFS('Mscale')">Link to the help page for this function</a>
+%
+%$LastChangedDate:: 2017-11-17 15:01:40 #$: Date of the last commit
+%
+% Examples:
+
+%{
+    % Example of M estimate of scale.
+    % M estimate of the scale using Tukey biweight rho function with a
+    % value of c associated to a breakdown point of 0.5.
+    psifunc=struct;
+    psifunc.class='TB';
+    bdp=0.5;
+    c=TBbdp(bdp,1);
+    % kc = E(rho) = sup(rho)*bdp
+    kc=c^2/6*bdp;
+    psifunc.c1=c;
+    psifunc.kc1=kc;
+    n=10000;
+    shift=5;
+    u=2*randn(n,1);
+    u(1:10)=u(1:10)+shift;
+    s=Mscale(u,psifunc)
+%}
+
+%{
+    % Estimate of scale using Hampel rho function. 
+    % M estimate of the scale using Hampel rho function with a
+    % value of c associated to a breakdown point of 0.5
+    psifunc=struct;
+    psifunc.class='HA'
+    abc=[1.5 3.5 8];
+    bdp=0.5;
+    c=HAbdp(bdp,1,abc);
+    % kc = E(rho) = sup(rho)*bdp
+    kc=HArho(c*abc(3),[c, abc])*bdp;
+    psifunc.c1=[c abc];
+    psifunc.kc1=kc;
+    n=10000;
+    shift=5;
+    u=3*randn(n,1);
+    u(1:10)=u(1:10)+shift;
+    s=Mscale(u,psifunc)
+%}
+
+%% Beginning of code
+
+K=0.1996;
+
+% M-estimator of scale using the requested rho function.
+
+if nargin<5
+    maxiter = 200;
+end
+
+if nargin<4
+    tol = 1e-7;
+end
+
+if nargin<3
+    sc=median(abs(u))/.6745;
+else
+    sc=initialsc;
+end
+
+loop = 0;
+err = 1;
+while  (( loop < maxiter ) && (err > tol))
+    % scale step: see equation 7.119 of Huber and Ronchetti, p. 176
+    % scalenew = scaleold *(1/n)*\sum  \rho(u_i/scaleold) / kc
+    scnew = sc*sqrt( mean(TBrho(u/sc,c)) / K);
+    err = abs(scnew/sc - 1);
+    sc = scnew;
+    % disp(sc)
+    loop = loop+1;
+end
+% disp(loop)
+% sc=sc;
+end
diff --git a/ICORS2025summerSchool/MscaleEASYmain.m b/ICORS2025summerSchool/MscaleEASYmain.m
@@ -0,0 +1,23 @@
+%% Example of robust estimate of scale
+rng(1)
+nout=5;
+n=100;
+u=[randn(n,1); 200*ones(nout,1)];
+est=MscaleEASY(u,1.5476);
+
+
+%% Simulation study to check the distribution of robust estimate of scale
+rng(1)
+nout=5;
+n=1000;
+nsimul=10000;
+est=zeros(nsimul,1);
+for i=1:nsimul
+    %u=randn(n,1);
+    u=2*[randn(n,1); 200*ones(nout,1)];
+    
+    est(i)=MscaleEASY(u,1.5476);
+end
+
+boxplot(est)
+
diff --git a/ICORS2025summerSchool/TBbdpEASY.m b/ICORS2025summerSchool/TBbdpEASY.m
@@ -0,0 +1,76 @@
+function c = TBbdpEASY(bdp)
+%TBbdp finds the constant c associated to the supplied breakdown point for Tukey's biweight
+% The constant is found through a dichotomic search
+%
+%
+%<a href="matlab: docsearchFS('TBbdpEASY')">Link to the help function</a>
+%
+%  Required input arguments:
+%
+%      bdp    : breakdown point. Scalar. Scalar defining breakdown point
+%               (i.e a number between 0 and 0.5)
+%               Data Types - single|double
+%
+%  Optional input arguments:
+%
+% Output:
+%
+%  c : Requested tuning constant. Scalar. Tuning constatnt of Tukey Biweight
+%         function associated to requested breakdown point
+%
+%
+% See also: OPTbdp, HYPbdp, HAbdp
+%
+% References:
+%
+% Atkinson et al. (2025), 
+%
+% Copyright 2008-2025.
+% Written by FSDA team
+%
+%
+%<a href="matlab: docsearchFS('TBbdpEASY')">Link to the help page for this function</a>
+%
+%
+%
+% Examples:
+%
+%{
+    % Find c given bdp.
+    % The constant c associated to a breakdown point of 50% in regression is
+    % c=1.547644980928226
+    c=TBbdpEASY(0.5)
+%}
+%
+
+%% Beginning of code
+
+% c = starting point of the iteration
+c=5;
+% step = width of the dichotomic search (it decreases by half at each
+% iteration). Generally it can be smaller. A large value ensures converge
+% when bdp is very small and p is very large.
+step=200;
+
+% Convergence condition is E(\rho) = \rho(c) bdp
+%  where \rho(c) for TB is c^2/6
+Erho1=10;
+eps=1e-11;
+while abs(Erho1-1)>eps
+    
+    c2=c.^2;
+    
+    Erho= (chi2cdf(c2,3)/2-3*chi2cdf(c2,5)./(2*c2)+...
+        +15*chi2cdf(c2,7)./(6*(c.^4))+ ((c.^2)/3).*(1-normcdf(c)));
+    
+    Erho1=(Erho./(c.^2))*(6/bdp);
+    
+    step=step/2;
+    if Erho1>1
+        c=c+step;
+    else
+        c=max(c-step,0.1);
+    end
+     disp([step c Erho1])
+end
+end
diff --git a/ICORS2025summerSchool/TBbdpEASYmain.m b/ICORS2025summerSchool/TBbdpEASYmain.m
@@ -0,0 +1,3 @@
+% Find constant c associated to bdp=0.5 for TB
+
+c=TBbdpEASY(0.5);
diff --git a/ICORS2025summerSchool/example_concentration_steps.m b/ICORS2025summerSchool/example_concentration_steps.m
diff --git a/ICORS2025summerSchool/example_location.m b/ICORS2025summerSchool/example_location.m

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+% Find constant c associated to bdp=0.5 for TB`
	`2`	`+`
	`3`	`+c=TBbdpEASY(0.5);`