Skip to content

Commit 128a349

Browse files
committed
version bump 0.8.3
also force struct version between 0.4.1 and 0.5.0
1 parent 393c982 commit 128a349

9 files changed

+180
-20
lines changed

DESCRIPTION

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Package: structToolbox
22
Type: Package
33
Title: Some tools bult using the struct package
4-
Version: 0.8.2
4+
Version: 0.8.3
55
Author: Gavin Rhys Lloyd
66
Maintainer: Gavin Rhys Lloyd <[email protected]>
77
Description: Extends the class templates provided by the struct package to provide methods for training PCA, PLS models with cross-validation, permutation testing etc.
@@ -44,6 +44,7 @@ Collate:
4444
'glog_class.R'
4545
'grid_search_1d_class.R'
4646
'hca_class.R'
47+
'kfold_xval2_class.R'
4748
'kfold_xval_class.R'
4849
'kfold_xval_charts.R'
4950
'knn_impute_class.R'
@@ -75,7 +76,7 @@ Collate:
7576
'vec_norm_class.R'
7677
'wilcox_test_class.R'
7778
'zzz.R'
78-
Depends: struct (== 0.4.1)
79+
Depends: struct (>= 0.4.1), struct(< 0.5.0)
7980
Imports: ggplot2,
8081
pmp,
8182
gridExtra,
@@ -103,7 +104,7 @@ Suggests:
103104
sbcms,
104105
Rtsne
105106
Remotes: computational-metabolomics/pmp,
106-
computational-metabolomics/struct@v0.4.1,
107+
computational-metabolomics/struct,
107108
computational-metabolomics/sbcms
108109
VignetteBuilder: knitr
109110
biocViews: WorkflowStep

NAMESPACE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ export(grid_search_1d)
4242
export(gs_line)
4343
export(hca_dendrogram)
4444
export(kfold_xval)
45+
export(kfold_xval2)
4546
export(kfoldxcv_grid)
4647
export(kfoldxcv_metric)
4748
export(knn_impute)

R/PCA_class.R

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@ PCA<-setClass(
1919
outputs.eigenvalues='data.frame',
2020
outputs.ssx='numeric',
2121
outputs.correlation='data.frame',
22-
outputs.that='dataset'
22+
outputs.that='dataset',
23+
outputs.xhat='dataset'
2324
),
2425
prototype = list(name='Principal Component Analysis (PCA)',
2526
description='PCA is a multivariate data reduction technique. It summarises the data in a smaller number of Principal Components that describe the maximum variation present in the dataset.',
@@ -107,6 +108,12 @@ setMethod(f="model.predict",
107108
dataset.data(S)=that
108109
output.value(M,'that')=S
109110

111+
xhat=as.matrix(that)%*%as.matrix(t(P))
112+
xhat=as.data.frame(xhat)
113+
rownames(that)=rownames(X)
114+
colnames(xhat)=colnames(X)
115+
M$xhat=dataset(data=xhat,sample_meta=D$sample_meta,variable_meta=D$variable_meta)
116+
110117
return(M)
111118
}
112119
)

R/forward_selection_by_rank_class.R

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,6 @@ eval_loess=function(x,X,Y,k=10,p=0.66)
167167
# Y = observed values
168168
# k = number of replicates
169169
# p = proportion in training
170-
171170
residual=numeric(k)
172171
for (i in 1:k)
173172
{
@@ -181,9 +180,16 @@ eval_loess=function(x,X,Y,k=10,p=0.66)
181180
yy2=Y[X %in% xx2]
182181

183182

184-
loessMod <- loess(yy ~ xx, span=x) # 25% smoothing span
185-
smoothed=stats::predict(loessMod,newdata=xx2)
186-
residual[i]=sum((smoothed-yy2)^2)
183+
loessMod <- loess(yy ~ xx, span=x)
184+
185+
# check for NaN
186+
if (any(is.nan(loessMod$fitted))){
187+
residual[i]=99999
188+
} else {
189+
190+
smoothed=stats::predict(loessMod,newdata=xx2)
191+
residual[i]=sum((smoothed-yy2)^2)
192+
}
187193
}
188194
return(sqrt(mean(residual)))
189195
}

R/kfold_xval2_class.R

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
#' kfold_xval model class
2+
#'
3+
#' Applies k-fold crossvalidation to a model or model.seq()
4+
#' @export kfold_xval2
5+
#' @examples
6+
#' I = kfold_xval2()
7+
kfold_xval2<-setClass(
8+
"kfold_xval2",
9+
contains='resampler',
10+
slots=c(params.folds='numeric',
11+
params.method='character',
12+
params.factor_name='entity',
13+
outputs.metric='data.frame'
14+
),
15+
prototype = list(name='k-fold cross-validation',
16+
type="resampling",
17+
result='metric',
18+
params.folds=10,
19+
params.method='venetian'
20+
)
21+
)
22+
23+
#' @export
24+
#' @template run
25+
setMethod(f="run",
26+
signature=c("kfold_xval2",'dataset','metric'),
27+
definition=function(I,D,MET=NULL)
28+
{
29+
X=dataset.data(D)
30+
31+
32+
WF=models(I)
33+
34+
# venetian 123123123123
35+
if (param.value(I,'method')=='venetian')
36+
{
37+
fold_id=rep(1:param.value(I,'folds'),length.out=nrow(X))
38+
} else if (param.value(I,'method')=='blocks')
39+
{ # blocks 111122223333
40+
fold_id=rep(1:param.value(I,'folds'),length.out=nrow(X))
41+
fold_id=sort(fold_id)
42+
} else if (param.value(I,'method')=='random') {
43+
fold_id=rep(1:param.value(I,'folds'),length.out=nrow(X))
44+
fold_id=sample(fold_id,length(fold_id),replace = FALSE)
45+
} else {
46+
stop('unknown method for cross-validation. (try "venetian", "blocks" or "random")')
47+
}
48+
49+
# for each value of k, split the data and run the workflow
50+
for (i in 1:param.value(I,'folds'))
51+
{
52+
# prep the training data
53+
TrainX=X[fold_id!=i,,drop=FALSE]
54+
TrainY=Y[fold_id!=i,,drop=FALSE]
55+
dtrain=dataset(data=TrainX,sample_meta=TrainY)
56+
57+
TestX=X[fold_id==i,,drop=FALSE]
58+
TestY=Y[fold_id==i,,drop=FALSE]
59+
dtest=dataset(data=TestX,sample_meta=TestY)
60+
61+
if (is(WF,'model_OR_model.seq'))
62+
# HAS TO BE A model OR model.seq
63+
{
64+
WF=model.train(WF,dtrain)
65+
# apply the model
66+
WF=model.predict(WF,dtrain)
67+
p=predicted(WF)
68+
# metric
69+
if (MET@actual=='sample_meta') {
70+
yhat=p
71+
} else if (MET@actual=='data') {
72+
yhat=p$data
73+
} else {
74+
stop('MET$actual not implemented yet')
75+
}
76+
YHATtr[fold_id!=i,]=yhat
77+
78+
# test set
79+
WF=model.predict(WF,dtest)
80+
p=predicted(WF)
81+
82+
if (MET@actual=='sample_meta') {
83+
yhat=p
84+
} else if (MET@actual=='data') {
85+
yhat=p$data
86+
} else {
87+
stop('MET$actual not implemented yet')
88+
}
89+
YHAT[fold_id==i,]=yhat
90+
91+
92+
} else if (is(WF,'iterator'))
93+
{
94+
stop('not implemented yet')
95+
}
96+
# validation set...??
97+
# WF=predict(WF,dval)
98+
# p=predicted(WF[length(WF)])
99+
# val_result[,1]=p[,1]
100+
101+
#all_results[((nrow(X)*(i-1))+1):(nrow(X)*i),]=fold_results
102+
}
103+
104+
if (MET@actual=='data') {
105+
# if its a model sequence get the prediction from the penultimate step
106+
# for comparison with the predictions
107+
if (is(WF,'model_OR_model.seq')) {
108+
# apply model to data
109+
WF=model.apply(WF,D)
110+
n=length(WF)
111+
if (n>1) {# just in case a sequence of 1
112+
Y=predicted(WF[n-1])$data
113+
}
114+
}
115+
}
116+
117+
# test sets metric
118+
df=data.frame('training_set'=0,'test_set'=0,'metric'=class(MET)[[1]])
119+
MET=calculate(MET,Y,YHAT)
120+
df$training_set=value(MET)
121+
# training set metric
122+
MET=calculate(MET,Y,YHATtr)
123+
df$test_set=value(MET)
124+
I$metric=df
125+
return(I)
126+
}
127+
)
128+

R/permutation_test2_class.R

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,8 @@ permutation_test2<-setClass(
1111
slots=c(
1212
params.number_of_permutations='numeric',
1313
params.collect='character',
14-
outputs.results.permuted='data.frame',
15-
outputs.results.unpermuted='data.frame',
16-
outputs.metric='data.frame',
14+
outputs.metric_permuted='data.frame',
15+
outputs.metric_unpermuted='data.frame',
1716
outputs.collected='entity'
1817
),
1918
prototype = list(name='permutation test',
@@ -40,9 +39,6 @@ setMethod(f="run",
4039
WF=models(I)
4140
n=param.value(I,'number_of_permutations')
4241

43-
all_results_permuted=data.frame('actual'=rep(y[,1],n),'predicted'=rep(y[,1],n),'permutation'=0)
44-
all_results_unpermuted=data.frame('actual'=rep(y[,1],n),'predicted'=rep(y[,1],n),'permutation'=0)
45-
4642
collected=list(permuted=list(),unpermuted=list())
4743

4844
for (i in 1:n)
@@ -142,8 +138,8 @@ setMethod(f="run",
142138

143139
}
144140
# store results
145-
output.value(I,'results.permuted')=all_results_permuted
146-
output.value(I,'results.unpermuted')=all_results_unpermuted
141+
output.value(I,'metric_permuted')=all_results_permuted
142+
output.value(I,'metric_unpermuted')=all_results_unpermuted
147143
return(I)
148144
}
149145
)

R/r_squared_class.R

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,14 @@ setMethod(f="calculate",
1919
signature=c('r_squared'),
2020
definition=function(obj,Y,Yhat)
2121
{
22-
SSR = sum((Yhat-mean(Y))^2)
23-
SSE = sum((Y-Yhat)^2)
24-
SSTO = sum((Y-mean(Y))^2)
22+
23+
M=matrix(colMeans(Y),nrow=1)
24+
O=matrix(1,nrow=nrow(Y),ncol=1)
25+
M=O %*% M
26+
27+
SSR = sum(sum((Yhat-M)^2))
28+
SSE = sum(sum((Y-Yhat)^2))
29+
SSTO = sum(sum((Y-M)^2))
2530

2631
R2=1-(SSE/SSTO)
2732

man/kfold_xval2-class.Rd

Lines changed: 13 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/run.Rd

Lines changed: 4 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)