|
| 1 | +#' correlation model class |
| 2 | +#' |
| 3 | +#' correlation model class. Calculate correlation between features and continuous variables |
| 4 | +#' |
| 5 | +#' @import struct |
| 6 | +#' @import stats |
| 7 | +#' @export corr_coef |
| 8 | +corr_coef<-setClass( |
| 9 | + "corr_coef", |
| 10 | + contains=c('method'), |
| 11 | + slots=c( |
| 12 | + # INPUTS |
| 13 | + params.alpha='entity.stato', |
| 14 | + params.mtc='entity.stato', |
| 15 | + params.factor_names='entity', |
| 16 | + params.method='enum', |
| 17 | + # OUTPUTS |
| 18 | + outputs.coeff='entity', |
| 19 | + outputs.p_value='entity', |
| 20 | + outputs.significant='entity' |
| 21 | + ), |
| 22 | + prototype = list(name='Correlation coefficient', |
| 23 | + description='Calculates the correlation coefficient between features and continuous factors.', |
| 24 | + type="univariate", |
| 25 | + predicted='p_value', |
| 26 | + |
| 27 | + params.factor_names=entity(name='Factor names', |
| 28 | + type='character', |
| 29 | + description='Names of sample_meta columns to use' |
| 30 | + ), |
| 31 | + |
| 32 | + params.alpha=entity.stato(name='Confidence level', |
| 33 | + stato.id='STATO:0000053', |
| 34 | + value=0.05, |
| 35 | + type='numeric', |
| 36 | + description='the p-value cutoff for determining significance.' |
| 37 | + ), |
| 38 | + params.mtc=entity.stato(name='Multiple Test Correction method', |
| 39 | + stato.id='OBI:0200089', |
| 40 | + value='fdr', |
| 41 | + type='character', |
| 42 | + description='The method used to adjust for multiple comparisons.' |
| 43 | + ), |
| 44 | + |
| 45 | + params.method=enum(name='Type of correlation', |
| 46 | + value='spearman', |
| 47 | + type='character', |
| 48 | + description='"kendall", "pearson" or "spearman" correlation coefficient. Default="spearman".', |
| 49 | + list=c("kendall", "pearson","spearman") |
| 50 | + ), |
| 51 | + outputs.coeff=entity(name='Correlation coefficient', |
| 52 | + type='data.frame', |
| 53 | + description='the value of the calculate statistics which is converted to a p-value when compared to a t-distribution.' |
| 54 | + ), |
| 55 | + outputs.p_value=entity.stato(name='p value', |
| 56 | + stato.id='STATO:0000175', |
| 57 | + type='data.frame', |
| 58 | + description='the probability of observing the calculated t-statistic.' |
| 59 | + ), |
| 60 | + |
| 61 | + outputs.significant=entity(name='Significant features', |
| 62 | + #stato.id='STATO:0000069', |
| 63 | + type='data.frame', |
| 64 | + description='TRUE if the calculated p-value is less than the supplied threhold (alpha)' |
| 65 | + ) |
| 66 | + ) |
| 67 | +) |
| 68 | + |
| 69 | +#' @export |
| 70 | +setMethod(f="method.apply", |
| 71 | + signature=c("corr_coef",'dataset'), |
| 72 | + definition=function(M,D) |
| 73 | + { |
| 74 | + |
| 75 | + fcn2=function(y,x) { |
| 76 | + s=cor.test(x,y,method=M$method,use="na.or.complete") |
| 77 | + } |
| 78 | + fcn=function(x) { |
| 79 | + X=D$sample_meta[M$factor_names] |
| 80 | + out=unlist(lapply(X,fcn2,x)) |
| 81 | + } |
| 82 | + |
| 83 | + out=apply(D$data,2,fcn) |
| 84 | + out=as.data.frame(t(out)) |
| 85 | + |
| 86 | + M$p_value=data.frame('p_value'=p.adjust(out[,2],method=M$mtc),row.names = colnames(D$data)) |
| 87 | + M$coeff=data.frame('coeff'=out[,3],row.names = colnames(D$data)) |
| 88 | + M$significant=data.frame('significant'=p.adjust(out[,2],method=M$mtc)>M$alpha,row.names = colnames(D$data)) |
| 89 | + |
| 90 | + return(M) |
| 91 | + } |
| 92 | +) |
| 93 | + |
| 94 | + |
| 95 | + |
| 96 | + |
| 97 | + |
| 98 | + |
| 99 | + |
| 100 | + |
| 101 | + |
0 commit comments