@@ -43,7 +43,7 @@ ttest = function(
43
43
dof = ' entity' ,
44
44
significant = ' entity' ,
45
45
conf_int = ' entity' ,
46
- estimates = ' data.frame '
46
+ estimates = ' entity '
47
47
),
48
48
prototype = list (name = ' t-test' ,
49
49
description = paste0(' A t-test compares the means of two factor levels. ' ,
@@ -85,12 +85,12 @@ ttest = function(
85
85
86
86
t_statistic = entity(name = ' t-statistic' ,
87
87
ontology = ' STATO:0000176' ,
88
- type = ' numeric ' ,
88
+ type = ' data.frame ' ,
89
89
description = ' the value of the calculate statistics which is converted to a p-value when compared to a t-distribution.'
90
90
),
91
91
p_value = entity(name = ' p value' ,
92
92
ontology = ' STATO:0000175' ,
93
- type = ' numeric ' ,
93
+ type = ' data.frame ' ,
94
94
description = ' the probability of observing the calculated t-statistic.'
95
95
),
96
96
dof = entity(name = ' degrees of freedom' ,
@@ -100,7 +100,7 @@ ttest = function(
100
100
),
101
101
significant = entity(name = ' Significant features' ,
102
102
# ontology='STATO:0000069',
103
- type = ' logical ' ,
103
+ type = ' data.frame ' ,
104
104
description = ' TRUE if the calculated p-value is less than the supplied threhold (alpha)'
105
105
),
106
106
conf_int = entity(name = ' Confidence interval' ,
@@ -113,6 +113,11 @@ ttest = function(
113
113
type = ' numeric' ,
114
114
value = 0.95 ,
115
115
max_length = 1
116
+ ),
117
+ estimates = entity(
118
+ name = ' Estimates' ,
119
+ description = ' The group means estimated when computing the t-statistic.' ,
120
+ type = ' data.frame'
116
121
)
117
122
)
118
123
)
@@ -134,64 +139,50 @@ setMethod(f="model_apply",
134
139
135
140
L = levels(y )
136
141
if (length(L )!= 2 ) {
137
- stop(' must have exactly two levels for this implmentation of t-statistic' )
142
+ stop(' must have exactly two levels for this implementation of t-statistic' )
138
143
}
139
144
140
- estimate_name = ' estimate'
141
- if (M $ paired ){
142
- # check that we have a pair for each sample,
143
- # if not then remove
144
- u = unique(D $ sample_meta [[M $ paired_factor ]])
145
- out = character (0 ) # list of sample_id to remove
146
- for (k in u ) {
147
- n = sum(D $ sample_meta [[M $ paired_factor ]]== k ) # number of samples (could be same class)
148
- if (n < 2 ) {
149
- out = c(out ,k )
150
- }
151
- # if we have more than 2 then we need an even number.
152
- if (n %% 2 != 0 ) {
153
- out = c(out ,k )
154
- }
155
- # check we have enough groups (must be two for ttest)
156
- ng = length(unique(D $ sample_meta [[M $ factor_names ]][D $ sample_meta [[M $ paired_factor ]]== k ]))
157
- if (ng != 2 ) {
158
- out = c(out ,k )
159
- }
160
-
161
- }
162
- # D$data=D$data[!(D$sample_meta[[M$paired_factor]] %in% out),]
163
- # D$sample_meta=D$sample_meta[!(D$sample_meta[[M$paired_factor]] %in% out),]
164
- D = D [! (D $ sample_meta [[M $ paired_factor ]] %in% out ),]
165
- y = D $ sample_meta [[M $ factor_names ]]
166
-
167
- # sort the data by sample id so that theyre in the right order for paired ttest
168
- temp = D $ sample_meta [order(D $ sample_meta [[M $ factor_names ]],D $ sample_meta [[M $ paired_factor ]]), ]
169
- D = D [rownames(temp ),]
170
-
171
- # check number per class
172
- # if less then 2 then remove
173
- FF = filter_na_count(threshold = 2 ,factor_name = M $ factor_names )
174
- FF = model_apply(FF ,D )
175
- D = predicted(FF )
176
-
177
- # check equal numbers per class. if not equal then exclude.
178
- IN = rownames(FF $ count )[(FF $ count [,1 ]== FF $ count [,2 ]) & (FF $ count [,1 ]> 2 ) & (FF $ count [,2 ]> 2 )]
179
- D = D [,IN ]
180
145
146
+ if (M $ paired ){
181
147
estimate_name = ' estimate.mean of the differences'
148
+ } else {
149
+ estimate_name = ' estimate'
182
150
}
183
151
184
-
185
-
186
152
X = D $ data
187
153
y = D $ sample_meta [[M $ factor_names ]]
188
154
189
155
output = lapply(X ,function (x ) {
190
156
a = tryCatch({
157
+
158
+ # check for pairs if required
159
+ if (M $ paired ) {
160
+ # get group A
161
+ dfA = data.frame (val = x [y == L [1 ]],id = D $ sample_meta [y == L [1 ],M $ paired_factor ])
162
+ # get group B
163
+ dfB = data.frame (val = x [y == L [2 ]],id = D $ sample_meta [y == L [2 ],M $ paired_factor ])
164
+ # merge
165
+ Z = merge(dfA ,dfB ,by = ' id' ) # will exclude any sample without a matching pair in sample list
166
+ # omit pairs with an NA
167
+ Z = na.omit(Z ) # excludes any pair with at least one NA
168
+
169
+ # check for at least 3 pairs
170
+ if (nrow(Z )< 3 ) {
171
+ stop(' not enough pairs' )
172
+ }
173
+
174
+ # extract for t-stat
175
+ A = Z $ val.x
176
+ B = Z $ val.y
177
+ } else {
178
+ A = x [y == L [1 ]]
179
+ B = x [y == L [2 ]]
180
+ }
181
+
191
182
g = unlist(
192
183
t.test(
193
- x [ y == L [ 1 ]] ,
194
- x [ y == L [ 2 ]] ,
184
+ A ,
185
+ B ,
195
186
paired = M $ paired ,
196
187
var.equal = M $ equal_variance ,
197
188
conf.level = M $ conf_level
@@ -226,11 +217,11 @@ setMethod(f="model_apply",
226
217
output = output [,- 1 ]
227
218
# ensure outputs are in the correct order (TODO: update to data.frame with rownames)
228
219
output = output [CN ,]
229
- output $ p.value = p.adjust(output $ p.value ,method = param_value(M ,' mtc' ))
230
- output_value(M ,' t_statistic' )= output $ statistic.t
231
- output_value(M ,' p_value' )= output $ p.value
220
+ output $ p.value = ' p_value ' = p.adjust(output $ p.value ,method = param_value(M ,' mtc' ))
221
+ output_value(M ,' t_statistic' )= data.frame ( ' t_statistic ' = output $ statistic.t , row.names = CN )
222
+ output_value(M ,' p_value' )= data.frame ( ' p_value ' = output $ p.value , row.names = CN )
232
223
output_value(M ,' dof' )= output $ parameter.df
233
- output_value(M ,' significant' )= output $ p.value < param_value(M ,' alpha' )
224
+ output_value(M ,' significant' )= data.frame ( ' significant ' = output $ p.value < param_value(M ,' alpha' ), row.names = CN )
234
225
M $ conf_int = output [,4 : 5 ,drop = FALSE ]
235
226
colnames(M $ conf_int )= c(' lower' ,' upper' )
236
227
if (M $ paired ) {
@@ -253,9 +244,9 @@ setMethod(f="model_apply",
253
244
setMethod (f="as_data_frame ",
254
245
signature = c(" ttest" ),
255
246
definition = function (M ) {
256
- out = data.frame (' t_statistic' = M $ t_statistic ,
257
- ' t_p_value' = M $ p_value ,
258
- ' t_significant' = M $ significant )
247
+ out = data.frame (' t_statistic' = M $ t_statistic [, 1 ] ,
248
+ ' t_p_value' = M $ p_value [, 1 ] ,
249
+ ' t_significant' = M $ significant [, 1 ], row.names = rownames( M $ t_statistic ) )
259
250
out = cbind(out ,M $ estimates ,M $ conf_int )
260
251
}
261
252
)
0 commit comments