Skip to content

Commit db58e14

Browse files
added ARdata
1 parent 3220a9f commit db58e14

File tree

1 file changed

+370
-0
lines changed

1 file changed

+370
-0
lines changed

ICORS2025summerSchool/ARdata.m

Lines changed: 370 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,370 @@
1+
2+
3+
%% MR: (Multiple regression data): yXplot
4+
close all;
5+
load('multiple_regression.txt');
6+
y=multiple_regression(:,4);
7+
X=multiple_regression(:,1:3);
8+
yXplot(y,X);
9+
10+
%% MR: (Multiple regression data): traditional fit
11+
close all;
12+
load('multiple_regression.txt');
13+
y=multiple_regression(:,4);
14+
X=multiple_regression(:,1:3);
15+
out=fitlm(X,y);
16+
disp(out)
17+
18+
%% MR: (Multiple regression data): traditional robust fit
19+
close all;
20+
load('multiple_regression.txt');
21+
y=multiple_regression(:,4);
22+
X=multiple_regression(:,1:3);
23+
out=fitlm(X,y,'RobustOpts','on');
24+
disp(out)
25+
26+
%% MR: (Multiple regression data): qqplot with envelopes
27+
load('multiple_regression.txt');
28+
y=multiple_regression(:,4);
29+
X=multiple_regression(:,1:3);
30+
outLM=fitlm(X,y,'exclude','');
31+
res=outLM.Residuals{:,3};
32+
qqplotFS(res,'X',X,'plots',1);
33+
title('qqplot of stud. res.')
34+
text(-2.4,-2.4,num2str(43),'Units','data');
35+
36+
%% Plot of residuals against fitted values
37+
close all
38+
plot(outLM.Fitted,res,'o')
39+
sel=43;
40+
text(outLM.Fitted(sel)+0.5,res(sel),num2str(sel))
41+
xlabel('Fitted values')
42+
ylabel('Residuals')
43+
44+
%% MR: (Multiple regression data): S estimators with 2 values of breakdown point
45+
conflev=[0.95 0.99];
46+
% Note that the pattern of residuals changes completely
47+
% Using bdp=0.5 the outliers are correctly found, on the other hand using
48+
% bdp=0.25 the masking effect is clear
49+
figure;
50+
h1=subplot(2,1,1);
51+
bdp=0.25;
52+
[out]=Sreg(y,X,'nsamp',3000,'bdp',bdp);
53+
resindexplot(out,'h',h1,'conflev',conflev);
54+
ylabel(['Breakdown point =' num2str(bdp)])
55+
h2=subplot(2,1,2);
56+
bdp=0.5;
57+
[out]=Sreg(y,X,'nsamp',3000,'bdp',bdp);
58+
resindexplot(out,'h',h2,'conflev',conflev);
59+
ylabel(['Breakdown point =' num2str(bdp)])
60+
cascade;
61+
62+
63+
%% Brushing from the resindexplot
64+
close all
65+
bdp=0.5;
66+
% two differentconfidence levels
67+
conflev=[0.95 0.99];
68+
69+
load('multiple_regression.txt');
70+
y=multiple_regression(:,4);
71+
X=multiple_regression(:,1:3);
72+
73+
[out]=Sreg(y,X,'nsamp',3000,'bdp',bdp,'yxsave',1);
74+
resindexplot(out,'conflev',conflev,'databrush',1);
75+
% ylabel(['Breakdown point =' num2str(bdp)])
76+
77+
78+
%% Back to slides monitoring of S estimators
79+
80+
%% Resfwdplot shown as a movie
81+
load('multiple_regression.txt');
82+
y=multiple_regression(:,4);
83+
X=multiple_regression(:,1:3);
84+
% LMS using 1000 subsamples
85+
[out]=LXS(y,X,'nsamp',10000);
86+
% Forward Search
87+
[out]=FSReda(y,X,out.bs);
88+
resfwdplot(out,'movieLength',5)
89+
90+
%% MR (Multiple regression data): Forward EDA datatooltip which monitors bsb
91+
load('multiple_regression.txt');
92+
y=multiple_regression(:,4);
93+
X=multiple_regression(:,1:3);
94+
% LMS using 1000 subsamples
95+
[out]=LXS(y,X,'nsamp',10000);
96+
% Forward Search
97+
[out]=FSReda(y,X,out.bs);
98+
out1=out;
99+
% Create scaled squared residuals
100+
% out1.RES=out.RES.^2;
101+
102+
datatooltip=struct;
103+
datatooltip.SubsetLinesColor=[1 0 0];
104+
resfwdplot(out1,'datatooltip',datatooltip)
105+
106+
107+
%% MR (Multiple regression data): Forward EDA using persistent brushing
108+
load('multiple_regression.txt');
109+
y=multiple_regression(:,4);
110+
X=multiple_regression(:,1:3);
111+
% LMS using 1000 subsamples
112+
[out]=LXS(y,X,'nsamp',10000);
113+
% Forward Search
114+
[out]=FSReda(y,X,out.bs);
115+
out1=out;
116+
% Create scaled squared residuals
117+
out1.RES=out.RES.^2;
118+
119+
% plot minimum deletion residual with personalized options
120+
% mdrplot(out,'ylimy',[1 4.2],'xlimx',[10 60],'FontSize',14,'SizeAxesNum',14,'lwdenv',2);
121+
122+
% Persistent brushing on the plot of the scaled residuals. The plot is:
123+
fground.flabstep=''; % without labels at steps 0 and n
124+
fground.fthresh=3.5^2; % threshold which defines the trajectories in foreground
125+
fground.LineWidth=1.5; % personalised linewidth for trajectories in foreground
126+
fground.Color={'r'}; % personalised color (red lines) for trajectories in foreground
127+
128+
databrush=struct;
129+
databrush.bivarfit='';
130+
databrush.selectionmode='Rect'; % Rectangular selection
131+
databrush.persist='on'; % Enable repeated mouse selections
132+
databrush.Label='on'; % Write labels of trajectories while selecting
133+
databrush.RemoveLabels='off'; % Do not remove labels after selection
134+
databrush.Pointer='hand'; % Hand cursor point while selecting
135+
databrush.FlagSize='8'; % Size of the brushed points
136+
databrush.RemoveTool='on'; % Remove yellow selection after finishing brushing
137+
resfwdplot(out1,'fground',fground,'databrush',databrush);
138+
139+
%% Rotate manually
140+
scatter3(X(:,1),X(:,2),y)
141+
xlabel('X1')
142+
ylabel('X2')
143+
zlabel('y')
144+
hold('on');
145+
sel=[9 30 31 38 47 21];
146+
scatter3(X(sel,1),X(sel,2),y(sel),'r')
147+
% sel=[43];
148+
hold('on')
149+
sel1=43;
150+
scatter3(X(sel1,1),X(sel1,2),y(sel1),'k','MarkerFaceColor','k')
151+
text(X(sel1,1),X(sel1,2),y(sel1),'43')
152+
153+
154+
%% MR: Forward EDA persistent brushing with lasso selection.
155+
close all;
156+
load('multiple_regression.txt');
157+
y=multiple_regression(:,4);
158+
X=multiple_regression(:,1:3);
159+
% LMS using 1000 subsamples
160+
[out]=LXS(y,X,'nsamp',10000);
161+
% Forward Search
162+
[out]=FSReda(y,X,out.bs);
163+
out1=out;
164+
% Create scaled squared residuals
165+
out1.RES=out.RES.^2;
166+
167+
fground.flabstep=[15 20];
168+
databrush=struct;
169+
databrush.bivarfit='';
170+
databrush.selectionmode='Lasso'; % Lasso selection
171+
databrush.persist='on'; % Enable repeated mouse selections
172+
databrush.Label='on'; % Write labels of trajectories while selecting
173+
databrush.RemoveLabels='off'; % Do not remove labels after selection
174+
resfwdplot(out1,'fground',fground,'databrush',databrush);
175+
176+
%% MR: Traditional Variable selection (all units)
177+
close all;
178+
load('multiple_regression.txt');
179+
y=multiple_regression(:,4);
180+
X=multiple_regression(:,1:3);
181+
out=fitlm(X,y)
182+
183+
%% Traditional Variable selection (all units) without unit 43
184+
fitlm(X,y,'Exclude',43)
185+
186+
%% MR: Forward EDA rescaled t stat monitoring
187+
close all;
188+
load('multiple_regression.txt');
189+
y=multiple_regression(:,4);
190+
X=multiple_regression(:,1:3);
191+
% LMS using 10000 subsamples
192+
[out]=LXS(y,X,'nsamp',10000);
193+
% Forward Search
194+
[out]=FSReda(y,X,out.bs);
195+
hold('on');
196+
plot(out.Tols(:,1),out.Tols(:,3:end),'LineWidth',3)
197+
for j=3:5
198+
tj=['t_' num2str(j-2)];
199+
text(out.Tols(1,1)-1.2,out.Tols(1,j),tj,'FontSize',16)
200+
201+
end
202+
203+
quant=norminv(0.95);
204+
v=axis;
205+
lwdenv=2;
206+
line([v(1),v(2)],[quant,quant],'color','g','LineWidth',lwdenv);
207+
line([v(1),v(2)],[-quant,-quant],'color','g','LineWidth',lwdenv);
208+
% plot(out.Tols(end-6:end-1,1),out.Tols(end-6:end-1,3),'LineWidth',4,'color','r')
209+
title('Monitoring of t-stat','FontSize',14);
210+
xlabel('Subset size m');
211+
212+
213+
%% MR: monitoring of t-stat with zoom for first variable
214+
figure;
215+
hold('on');
216+
plot(out.Tols(:,1),out.Tols(:,3:end))
217+
ylim([-3 5]);
218+
quant=norminv(0.95);
219+
v=axis;
220+
lwdenv=2;
221+
line([v(1),v(2)],[quant,quant],'color','g','LineWidth',lwdenv);
222+
line([v(1),v(2)],[-quant,-quant],'color','g','LineWidth',lwdenv);
223+
plot(out.Tols(end-6:end-1,1),out.Tols(end-6:end-1,3),'LineWidth',4,'color','r')
224+
title('Monitoring of t-stat for first variable');
225+
xlabel('Subset size m');
226+
plot(out.Tols(end-7:end-6,1),out.Tols(end-7:end-6,3),'LineWidth',4,'color','b')
227+
plot(out.Tols(end-1:end,1),out.Tols(end-1:end,3),'LineWidth',4,'color','b')
228+
text(out.Tols(end-7,1),out.Tols(end-7,3)+0.7,'43','FontSize',16);
229+
text(out.Tols(end-1,1),out.Tols(end-1,3)+0.7,'43','FontSize',16);
230+
%annotation(gcf,'textarrow',[0.54 0.68],...
231+
% [0.28 0.44],'TextEdgeColor','none');
232+
text(53,1,'9, 21, 30, 31, 38, 47','FontSize',16,'Rotation',-45);
233+
234+
%% Succesful applications: see slides
235+
236+
%% Bank data see slides
237+
238+
%% Introduction to transformations
239+
240+
%% WD: Score test traditional analysis
241+
% Log transformation is strongly suggested
242+
clearvars;close all;
243+
load('wool.txt','wool');
244+
y=wool(:,4);
245+
X=wool(:,1:3);
246+
out=Score(y,X);
247+
lam="lambda="+(-1:0.5:1)';
248+
disp(array2table(out.Score,'RowNames',lam,"VariableNames","Score test"));
249+
250+
%% WD: fan plot
251+
% Log transformation is strongly suggested
252+
clearvars;close all;
253+
load('wool.txt','wool');
254+
y=wool(:,4);
255+
X=wool(:,1:3);
256+
[outfan]=FSRfan(y,X,'plots',1,'init',7);
257+
258+
259+
%% LD (Loyalty cards data): yXplot
260+
clearvars;close all;
261+
load('loyalty.txt');
262+
y=loyalty(:,4); %#ok<SUSENS>
263+
X=loyalty(:,1:3);
264+
namey='Sales';
265+
nameX={'Number of visits', 'Age', 'Number of persons in the family'};
266+
% yXplot
267+
yXplot(y,X,'nameX',nameX,'namey',namey);
268+
269+
%% LD fan plot
270+
clearvars;close all;
271+
load('loyalty.txt');
272+
y=loyalty(:,4); %#ok<SUSENS>
273+
X=loyalty(:,1:3);
274+
% Compute fan plot to find best value of transformation parameter
275+
[out]=FSRfan(y,X,'plots',1,'la',[-1 -0.5 0 1/4 1/3 0.4 0.5 1]);
276+
277+
278+
%% LD: dynamic brushing from the fan plot with dynamic brushing
279+
% Interactive_example
280+
clearvars;close all;
281+
load('loyalty.txt');
282+
y=loyalty(:,4);
283+
X=loyalty(:,1:3);
284+
namey='Sales';
285+
nameX={'Number of visits', 'Age', 'Number of persons in the family'};
286+
287+
% Compute fan plot to find best value of transformation parameter
288+
[out]=FSRfan(y,X,'plots',1,'la',[-1 -0.5 0 1/4 1/3 0.4 0.5 1]);
289+
%FlagSize controls how large must be the highlighted points. It is a
290+
%parameter of selectdataFS.
291+
fanplot(out,'xlimx',[10 520],'lwd',1.5,'FontSize',11,'SizeAxesNum',11,'nameX',nameX,'namey',namey,'databrush',{'selectionmode' 'Brush'...
292+
'multivarfit' '2' 'FlagSize' '5'})
293+
% If you wish to do persistent brushing from the fan plot
294+
% uncomment the following line. Notice that multiple trajectories can be selected
295+
% fanplot(out,'databrush',{'selectionmode' 'Rect' 'persist' 'on' 'selectionmode','Brush'})
296+
297+
298+
299+
%% LD: Automatic outlier detection procedure on transformed data
300+
clearvars;close all;
301+
load('loyalty.txt');
302+
y=loyalty(:,4);
303+
X=loyalty(:,1:3);
304+
y1=y.^(0.4);
305+
nameX={'Number of visits', 'Age', 'Number of persons in the family'};
306+
307+
namey1='Sales^{0.4}';
308+
[outFS]=FSR(y1,X,'namey',namey1,'nameX',nameX);
309+
310+
311+
%% LD: Automatic transformation
312+
close all
313+
load('loyalty.txt');
314+
y=loyalty(:,4); %#ok<SUSENS>
315+
X=loyalty(:,1:3);
316+
n=length(y);
317+
[outFSRfan]=FSRfan(y,X,'plots',1,'init',round(n*0.3),'nsamp',10000,'la',[-1:0.1:1],'msg',0);
318+
[out]=fanBIC(outFSRfan);
319+
320+
321+
322+
%% LD: Interactive monitoring of the trajectories of scaled residuals
323+
% Interactive_example
324+
% using persistent brushing
325+
clearvars;close all;
326+
load('loyalty.txt');
327+
y=loyalty(:,4);
328+
X=loyalty(:,1:3);
329+
330+
y1=y.^(0.4);
331+
[out]=LXS(y1,X,'nsamp',10000);
332+
[out]=FSReda(y1,X,out.bs);
333+
334+
databrush=struct;
335+
databrush.bivarfit='2';
336+
databrush.selectionmode='Rect'; % Brush selection
337+
databrush.persist='on'; % Enable repeated mouse selections
338+
databrush.Label='off'; % Write labels of trajectories while selecting
339+
databrush.RemoveLabels='on'; % Do not remove labels after selection
340+
resfwdplot(out,'databrush',databrush);
341+
342+
%% Examples of Extended Yeo Johnson transformation: back to slides
343+
344+
%% Examples of fraud detection: back to slides
345+
346+
347+
%% FP (Fishery product): preliminary analysis
348+
clearvars;close all;
349+
load('fishery.mat');
350+
y=fishery{:,2};
351+
X=fishery{:,1};
352+
% Plot of the original data
353+
plot(X,y,'*');
354+
xlabel('Quantity (Tons)');
355+
ylabel('Values (Thousands of Euros)');
356+
357+
%% FP: Dynamic brushing from the fan plot without persistent option
358+
% Interactive_example
359+
clearvars;close all;
360+
% Multiple trajectories can be selected
361+
load('fishery.mat');
362+
y=fishery{:,2};
363+
X=fishery{:,1};
364+
365+
[out]=FSRfan(y,X,'plots',1,'la',[0 0.5 1]);
366+
fanplot(out,'ylimy',[-40,20],'databrush',{'selectionmode' 'Rect' 'persist' '' 'selectionmode','Brush'},'conflev',1-0.001/length(y))
367+
368+
369+
370+

0 commit comments

Comments
 (0)