1+ library(corrplot )
2+
3+ message(" start PCA" )
4+ # f_table= f_table_ori
5+
6+ f_table_new = f_table
7+ f_table2 = f_table_new
8+ # colnames(f_table_new)
9+ #
10+ # #firstpaperorder
11+ # colnames(f_table2)=c ("AA_id","AB_group","H_speeds","N_distance_traveled","D_turning_angle","E_meander",
12+ # "O_activitytime_ST",
13+ # "K_act_bouts_ST","C_pause_length_ST","I_#pauses_ST","P_activitytime_timeT",
14+ # "L_act_bouts_DtimeT","B_pause_length_timeT","J_#pauses_timeT", "F_thigmotaxis_moving", "G_thigmotasix_pause","M_#walks","AC_stripe_deviation")
15+
16+ if (MINCOL == 2 ){
17+ colnames(f_table2 )= c(" AA_id" ," AB_group" ," H_speeds" ," N_distance_traveled" ," D_turning_angle" ," E_meander" ," XA_activitytime_ST" ," XB_act_bouts_ST" ," XC_pause_length_ST" ," XD_#pauses_ST" ," P_activitytime_timeT" ," L_act_bouts_DtimeT" ," B_pause_length_timeT" ," J_#pauses_timeT" ," F_thigmotaxis_moving" , " G_thigmotasix_pause" ," M_#walks" ," AC_stripe_deviation" )
18+ }
19+
20+ if (MINCOL == 8 ){
21+ colnames(f_table2 )= c(" AA_id" ," AB_group" ," ZA_genotype" , " ZB_treatment" , " ZC_machine" ," ZD_other" , " ZE_date" ," ZF_timeofday" ," H_speeds" ," N_distance_traveled" ," D_turning_angle" ," E_meander" ," XA_activitytime_ST" ," XB_act_bouts_ST" ," XC_pause_length_ST" ," XD_#pauses_ST" ," P_activitytime_timeT" ," L_act_bouts_DtimeT" ," B_pause_length_timeT" ," J_#pauses_timeT" ," F_thigmotaxis_moving" , " G_thigmotasix_pause" ," M_#walks" ," AC_stripe_deviation" )
22+ }
23+ # ##add this to make the pca over the genotype group only
24+
25+ # if (MINCOL==8){
26+ # colnames(f_table2)= c("AA_id","ZA_group2","AB_group", "ZB_treatment", "ZC_machine","ZD_other", "ZE_date","ZF_timeofday","H_speeds","N_distance_traveled","D_turning_angle","E_meander","XA_activitytime_ST","XB_act_bouts_ST","XC_pause_length_ST","XD_#pauses_ST","P_activitytime_timeT","L_act_bouts_DtimeT","B_pause_length_timeT","J_#pauses_timeT","F_thigmotaxis_moving", "G_thigmotasix_pause","M_#walks","AC_stripe_deviation")
27+ # }
28+
29+ data.frame (names(f_table ),names(f_table2 ))
30+ # # get linearity score out
31+ # f_table2=f_table2[,-16]
32+
33+ # # reorder variables following the free walk analysis
34+ f_table2 = f_table2 [,order(colnames(f_table2 ))]
35+
36+ # ## gives output if not all calculation are made
37+ # if(length(f_table_new) != 19) f_table2=f_table_new
38+
39+
40+ # ### ENTER HERE MANUALLY A NEW DATA AS ENTRY FOR THE PCA ANALYSIS
41+ # f_table2= f_table
42+
43+
44+ i_table = na.omit(f_table2 )
45+ h_table <- f_table2 [,c(3 : (length(f_table2 )- MINCOL + 2 - 4 ))] # +2 from group and id, -4 to take ST out
46+ rownames (h_table ) = paste (c(1 : length(f_table2 [,1 ])),f_table2 [,2 ])
47+ # h_table<-f_table2[,c(3:13,16:length(f_table2))]
48+ g_table <- na.omit(h_table )
49+ head(g_table )
50+ mydata.pca <- prcomp(g_table , retx = TRUE , center = TRUE , scale. = TRUE )
51+
52+
53+
54+ sd <- mydata.pca $ sdev
55+ loadings <- mydata.pca $ rotation
56+ rownames(loadings ) <- colnames(g_table )
57+ scores <- mydata.pca $ x
58+
59+
60+ t = mydata.pca $ sdev ^ 2 / sum(mydata.pca $ sdev ^ 2 )
61+ t2 = cumsum(t )
62+ plot(t2 * 10 , main = " variance explained cumulative" )
63+
64+
65+ PCA_res = data.frame (scores )
66+ PCA_res $ group = as.factor(id_table $ genotype )
67+
68+ # PCA_res$group = i_table[,19]
69+
70+ # Plot PC1 and 2 info on one graph
71+ PCA1to3 = data.frame (PCA_res $ PC1 ,PCA_res $ PC2 ,PCA_res $ PC3 ,PCA_res $ group )
72+
73+ Mean_PCA = create.mean.table(PCA1to3 ,levels(PCA1to3 $ PCA_res.group ),1 : 2 )
74+ Mean_PCA2 = create.mean.table(PCA1to3 ,levels(PCA1to3 $ PCA_res.group ),c(1 ,3 ))
75+ # ################################################
76+ # ##################################################
77+ # #################################################
78+ setwd(outputpath )
79+ scalingfact = 7
80+ scalingaxis = 1
81+
82+
83+ # ##prepare mean tables
84+ PCA1to3 = data.frame (PCA_res $ PC1 ,PCA_res $ PC2 ,PCA_res $ PC3 ,PCA_res $ group )
85+ Mean_PCA_3d = create.mean.table(PCA1to3 ,levels(PCA1to3 $ PCA_res.group ),1 : 3 )
86+ Mean_PCA = create.mean.table(PCA1to3 ,levels(PCA1to3 $ PCA_res.group ),1 : 2 )
87+
88+
89+ abc = c(1 : 8 , rgb(202 ,100 ,20 ,maxColorValue = 255 ),rgb(100 ,0 ,200 ,maxColorValue = 255 ))
90+
91+ plot(scores [,1 ], scores [,2 ], xlab = " PCA 1" , ylab = " PCA 2" ,
92+ type = " n" , main = " distance biplot" ,xlim = c(- max(scores [,1 : 2 ]* scalingaxis ), max(scores [,1 : 2 ]* scalingaxis )),
93+ ylim = c(- max(scores [,1 : 2 ]* scalingaxis ), max(scores [,1 : 2 ]* scalingaxis )))
94+
95+
96+ for (i in 1 : length(levels(PCA_res $ group ))){
97+ X = subset(PCA_res ,PCA_res $ group == levels(PCA_res $ group )[i ])
98+ points(X $ PC2 ~ X $ PC1 , col = abc [i ])
99+ # legend(list(x=-6,y=-i/2+5), legend=(levels(PCA_res$group)[i]), fill= i, bty="n")
100+ # text(x+1,y-0.1, levels(PCA_res$group)[i], col=i, cex=0.7)
101+ legend(" topleft" , legend = (levels(PCA_res $ group )[i ]), fill = abc [i ], bty = " n" , inset = c(0 ,i / 30 ))
102+
103+ x = Mean_PCA $ means $ PCA_res.PC1 [i ]
104+ y = Mean_PCA $ means $ PCA_res.PC2 [i ]
105+ x2 = Mean_PCA $ ses $ PCA_res.PC1 [i ]
106+ y2 = Mean_PCA $ ses $ PCA_res.PC2 [i ]
107+ segments (x - x2 ,y ,x + x2 ,y , col = abc [i ])
108+ segments (x ,y - y2 ,x ,y + y2 , col = abc [i ])
109+ }
110+ abline (v = 0 , h = 0 )
111+
112+ plot(scores [,1 ], scores [,3 ], xlab = " PCA 1" , ylab = " PCA 3" ,
113+ type = " n" , main = " distance biplot" ,xlim = c(- max(scores [,1 : 3 ]* scalingaxis ), max(scores [,1 : 3 ]* scalingaxis )),
114+ ylim = c(- max(scores [,1 : 3 ]* scalingaxis ), max(scores [,1 : 3 ]* scalingaxis )))
115+
116+
117+ for (i in 1 : length(levels(PCA_res $ group ))){
118+ X = subset(PCA_res ,PCA_res $ group == levels(PCA_res $ group )[i ])
119+ points(X $ PC3 ~ X $ PC1 , col = abc [i ])
120+ # legend(list(x=-6,y=-i/2+5), legend=(levels(PCA_res$group)[i]), fill= i, bty="n")
121+ # text(x+1,y-0.1, levels(PCA_res$group)[i], col=i, cex=0.7)
122+ legend(" topleft" , legend = (levels(PCA_res $ group )[i ]), fill = abc [i ], bty = " n" , inset = c(0 ,i / 30 ))
123+
124+ x = Mean_PCA2 $ means $ PCA_res.PC1 [i ]
125+ y = Mean_PCA2 $ means $ PCA_res.PC3 [i ]
126+ x2 = Mean_PCA2 $ ses $ PCA_res.PC1 [i ]
127+ y2 = Mean_PCA2 $ ses $ PCA_res.PC3 [i ]
128+ segments (x - x2 ,y ,x + x2 ,y , col = abc [i ])
129+ segments (x ,y - y2 ,x ,y + y2 , col = abc [i ])
130+ }
131+ abline (v = 0 , h = 0 )
132+
133+
134+
135+ # #########################################
136+ # # plot data pc1 and 2 / 1 and 3 without points
137+ M = max(Mean_PCA $ means )+ max(Mean_PCA $ ses )* 1.1
138+ plot(scores [,1 ], scores [,2 ], xlab = " PCA 1" , ylab = " PCA 2" ,
139+ type = " n" , main = " distance biplot" ,xlim = c(- M , M ),
140+ ylim = c(- M , M ))
141+
142+ # plot(PCA_res$PC2~PCA_res$PC1, type= "n", add=TRUE)
143+ for (i in 1 : length(levels(PCA_res $ group ))){
144+ X = subset(PCA_res ,PCA_res $ group == levels(PCA_res $ group )[i ])
145+ # points(X$PC2~ X$PC1, col=i, pch= i+10)
146+ legend(" topleft" , legend = (levels(PCA_res $ group )[i ]), fill = abc [i ], bty = " n" , inset = c(0 ,i / 30 ))
147+ # text(x+1,y-0.1, levels(PCA_res$group)[i], col=i,
148+ # cex=0.7)
149+ x = Mean_PCA $ means $ PCA_res.PC1 [i ]
150+ y = Mean_PCA $ means $ PCA_res.PC2 [i ]
151+ x2 = Mean_PCA $ ses $ PCA_res.PC1 [i ]
152+ y2 = Mean_PCA $ ses $ PCA_res.PC2 [i ]
153+ segments (x - x2 ,y ,x + x2 ,y , col = abc [i ])
154+ segments (x ,y - y2 ,x ,y + y2 , col = abc [i ])
155+ }
156+ abline (v = 0 , h = 0 )
157+
158+
159+
160+
161+ # ######
162+ # #####################################################
163+ # plot all
164+ require (rgl )
165+ load(" /Users/choupi/Desktop/pca3d.rdata" )
166+
167+
168+ setwd(outputpath )
169+ pdf(" samplesize_color_forpca.pdf" )
170+ par(mai = c(1.5 ,1.5 ,1.5 ,1.5 ))
171+ plot(PCA_res $ group , type = " n" , main = " samplesize" , ylim = c(0 ,30 ))
172+ for (i in 1 : length(levels(PCA_res $ group ))){
173+
174+ plot(PCA_res $ group [PCA_res $ group == levels(PCA_res $ group )[i ]], col = abc [i ], add = TRUE )
175+ }
176+
177+ dev.off()
178+ par(mai = c(1 ,1 ,1 ,1 ))
179+
180+ LEV = 0.4
181+ scalingfact = 1
182+
183+ plot3d(c(0 ,PCA_res $ PC1 )* scalingfact ,c(0 ,PCA_res $ PC3 )* scalingfact ,c(0 ,PCA_res $ PC2 )* scalingfact ,
184+ type = " n" ,box = FALSE ,axes = TRUE ,xlab = " PC1" ,ylab = " PC3" ,zlab = " PC2" )
185+
186+ for (i in 1 : length(levels(PCA_res $ group ))){
187+ # add points:
188+ # X = subset(PCA_res,PCA_res$group == levels(PCA_res$group)[i])
189+ points = data.frame (X $ PC1 , X $ PC3 ,X $ PC2 )
190+ # plot3d(points, col=i,type= "p",add=TRUE)
191+
192+
193+
194+
195+ x = Mean_PCA_3d $ means $ PCA_res.PC1 [i ]
196+ y = Mean_PCA_3d $ means $ PCA_res.PC3 [i ]
197+ z = Mean_PCA_3d $ means $ PCA_res.PC2 [i ]
198+ x2 = Mean_PCA_3d $ ses $ PCA_res.PC1 [i ]
199+ y2 = Mean_PCA_3d $ ses $ PCA_res.PC3 [i ]
200+ z2 = Mean_PCA_3d $ ses $ PCA_res.PC3 [i ]
201+ A = matrix (c(x - x2 ,y ,z ,x + x2 ,y ,z ,x ,y - y2 ,z ,x ,y + y2 ,z ,x ,y , z - z2 ,x ,y ,z + z2 ),6 ,3 , byrow = TRUE )
202+ segments3d(A , col = abc [i ])
203+ # textplot (levels(PCA_res$group)[i],col=i, add=TRUE)
204+
205+ plot3d(ellipse3d(cov(points ), centre = c(x ,y ,z ), level = LEV ), col = i , alpha = 0.3 , add = TRUE )
206+ }
207+
208+ # PCA_res$group= as.factor(id_table$genotype)
0 commit comments