Skip to content

Commit 611668f

Browse files
committed
fix dratio
- update equation(s) to match paper - add option to use sd or mad - add second version of dratio from paper - add tests
1 parent 106ae6f commit 611668f

File tree

3 files changed

+189
-17
lines changed

3 files changed

+189
-17
lines changed

R/d_ratio_filter_class.R

Lines changed: 63 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,19 @@
44
#' M = dratio_filter(threshold=20,qc_label='QC',factor_name='Class')
55
#' M = model_apply(M,D)
66
#' @export dratio_filter
7-
dratio_filter = function(threshold=20, qc_label='QC', factor_name, ...) {
7+
dratio_filter = function(
8+
threshold=20,
9+
qc_label='QC',
10+
factor_name,
11+
method='ratio',
12+
dispersion='sd',
13+
...) {
814
out=struct::new_struct('dratio_filter',
915
threshold=threshold,
1016
qc_label=qc_label,
1117
factor_name=factor_name,
18+
method=method,
19+
dispersion=dispersion,
1220
...)
1321
return(out)
1422
}
@@ -21,7 +29,9 @@ dratio_filter = function(threshold=20, qc_label='QC', factor_name, ...) {
2129
factor_name='entity',
2230
filtered='entity',
2331
flags='entity',
24-
d_ratio='data.frame'
32+
d_ratio='data.frame',
33+
method='enum',
34+
dispersion='enum'
2535
),
2636
prototype=list(name = 'Dispersion ratio filter',
2737
description = paste0('The dispersion ratio (d-ratio) compares the ',
@@ -33,7 +43,7 @@ dratio_filter = function(threshold=20, qc_label='QC', factor_name, ...) {
3343
'the feature is removed.'),
3444
type = 'filter',
3545
predicted = 'filtered',
36-
.params=c('threshold','qc_label','factor_name'),
46+
.params=c('threshold','qc_label','factor_name','method','dispersion'),
3747
.outputs=c('filtered','flags','d_ratio'),
3848
citations=list(
3949
bibentry(
@@ -75,8 +85,35 @@ dratio_filter = function(threshold=20, qc_label='QC', factor_name, ...) {
7585
description = 'Flag indicating whether the feature was rejected by the filter or not.',
7686
type='data.frame',
7787
value=data.frame()
88+
),
89+
method=enum(
90+
name='dratio method',
91+
description = c(
92+
'ratio' = paste0('Dispersion of the QCs divided by the ',
93+
'dispersion of the samples. Corresponds to Eq 4 in ',
94+
' Broadhurst et al (2018).'),
95+
'euclidean' = paste0('Dispersion of the QCs divided by the ',
96+
'euclidean length of the total dispersion. Total dispersion ',
97+
'is estimated from the QC and Sample dispersion by assuming ',
98+
'that they are orthogonal. Corresponds to Eq 5 in ',
99+
'Broadhurst et al (2018)')),
100+
allowed=c('ratio','euclidean'),
101+
value='ratio',
102+
type='character',
103+
max_length = 1
104+
),
105+
dispersion=enum(
106+
name='Dispersion method',
107+
description = c(
108+
'sd' = paste0('Dispersion is estimated using the ',
109+
'standard deviation.'),
110+
'mad' = paste0('Dispersion is estimated using the median ',
111+
'absolute deviation.')),
112+
allowed=c('sd','mad'),
113+
value='sd',
114+
type='character',
115+
max_length = 1
78116
)
79-
80117
)
81118
)
82119

@@ -86,25 +123,42 @@ setMethod(f="model_train",
86123
signature=c("dratio_filter","DatasetExperiment"),
87124
definition=function(M,D)
88125
{
89-
# mad QC samples
126+
# dispersion QC samples
90127
QC = filter_smeta(
91128
mode='include',
92129
levels=M$qc_label,
93130
factor_name=M$factor_name)
94131
QC = model_apply(QC,D)
95132
QC = predicted(QC)$data
96-
QC = apply(QC,2,mad,na.rm=TRUE)
133+
134+
if (M$dispersion=='mad') {
135+
QC = apply(QC,2,mad,na.rm=TRUE)
136+
} else {
137+
QC = apply(QC,2,sd,na.rm=TRUE)
138+
}
97139

98-
# mad not qc samples
140+
# dispersion (not QC) samples
99141
S = filter_smeta(
100142
mode='exclude',
101143
levels=M$qc_label,
102144
factor_name=M$factor_name)
103145
S = model_apply(S,D)
104146
S = predicted(S)$data
105-
S = apply(S,2,mad,na.rm=TRUE)
147+
148+
if (M$dispersion=='mad') {
149+
S = apply(S,2,mad,na.rm=TRUE) # constant = 1.4826 default
150+
} else {
151+
S = apply(S,2,sd,na.rm=TRUE)
152+
}
106153

107-
d_ratio=(QC/(QC+S))*100
154+
# dispersion ratio
155+
if (M$method=='ratio') {
156+
# eq 4
157+
d_ratio=(QC/S)*100
158+
} else {
159+
# eq 5
160+
d_ratio= (QC / sqrt((QC^2) + (S^2))) * 100
161+
}
108162

109163
OUT=d_ratio>M$threshold
110164

man/dratio_filter.Rd

Lines changed: 12 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 114 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,119 @@
11
# filter d-ratio
2-
test_that('d-ratio filter works as expected',{
2+
test_that('d-ratio filter works as expected using sd ratio',{
33
set.seed('57475')
44
# DatasetExperiment
55
DE = MTBLS79_DatasetExperiment(filtered=TRUE)
66
M = dratio_filter(
77
threshold=20,
88
qc_label = 'QC',
9-
factor_name='Class'
9+
factor_name='Class',
10+
method = 'ratio',
11+
dispersion = 'sd'
12+
)
13+
M = model_apply(M,DE)
14+
15+
# manually compute d-ratio
16+
qc_mad=unlist(
17+
lapply(DE$data[DE$sample_meta$Class=='QC',],sd,na.rm=TRUE)
18+
)
19+
sa_mad=unlist(
20+
lapply(DE$data[DE$sample_meta$Class!='QC',],sd,na.rm=TRUE)
21+
)
22+
dr = (qc_mad/(sa_mad))*100
23+
24+
# check manual vs fcn
25+
expect_true(all(dr==M$d_ratio$d_ratio))
26+
27+
# check values havent changed
28+
expect_equal(dr[[1]],23.05762,tolerance = 5e-6)
29+
expect_equal(dr[[100]],64.49242,tolerance = 5e-6)
30+
31+
# just number of filtered columns
32+
expect_true(ncol(predicted(M))==725)
33+
expect_true(ncol(DE)-ncol(predicted(M))==854)
34+
expect_true(ncol(DE)-ncol(predicted(M))==sum(dr>20))
35+
})
36+
37+
test_that('d-ratio filter works as expected using mad ratio',{
38+
set.seed('57475')
39+
# DatasetExperiment
40+
DE = MTBLS79_DatasetExperiment(filtered=TRUE)
41+
M = dratio_filter(
42+
threshold=20,
43+
qc_label = 'QC',
44+
factor_name='Class',
45+
method = 'ratio',
46+
dispersion = 'mad'
47+
)
48+
M = model_apply(M,DE)
49+
50+
# manually compute d-ratio
51+
qc_mad=unlist(
52+
lapply(DE$data[DE$sample_meta$Class=='QC',],mad,na.rm=TRUE)
53+
)
54+
sa_mad=unlist(
55+
lapply(DE$data[DE$sample_meta$Class!='QC',],mad,na.rm=TRUE)
56+
)
57+
dr = (qc_mad/(sa_mad))*100
58+
59+
# check manual vs fcn
60+
expect_true(all(dr==M$d_ratio$d_ratio))
61+
62+
# check values havent changed
63+
expect_equal(dr[[1]],14.14758,tolerance = 5e-6)
64+
expect_equal(dr[[100]],48.23871,tolerance = 5e-6)
65+
66+
# just number of filtered columns
67+
expect_true(ncol(predicted(M))==834)
68+
expect_true(ncol(DE)-ncol(predicted(M))==745)
69+
expect_true(ncol(DE)-ncol(predicted(M))==sum(dr>20))
70+
})
71+
72+
test_that('d-ratio filter works as expected using sd euclidean',{
73+
set.seed('57475')
74+
# DatasetExperiment
75+
DE = MTBLS79_DatasetExperiment(filtered=TRUE)
76+
M = dratio_filter(
77+
threshold=20,
78+
qc_label = 'QC',
79+
factor_name='Class',
80+
method = 'euclidean',
81+
dispersion = 'sd'
82+
)
83+
M = model_apply(M,DE)
84+
85+
# manually compute d-ratio
86+
qc_mad=unlist(
87+
lapply(DE$data[DE$sample_meta$Class=='QC',],sd,na.rm=TRUE)
88+
)
89+
sa_mad=unlist(
90+
lapply(DE$data[DE$sample_meta$Class!='QC',],sd,na.rm=TRUE)
91+
)
92+
dr = qc_mad/sqrt((qc_mad^2) + (sa_mad^2))*100
93+
94+
# check manual vs fcn
95+
expect_true(all(dr==M$d_ratio$d_ratio))
96+
97+
# check values havent changed
98+
expect_equal(dr[[1]],22.46809,tolerance = 5e-6)
99+
expect_equal(dr[[100]],54.19862,tolerance = 5e-6)
100+
101+
# just number of filtered columns
102+
expect_true(ncol(predicted(M))==747)
103+
expect_true(ncol(DE)-ncol(predicted(M))==832)
104+
expect_true(ncol(DE)-ncol(predicted(M))==sum(dr>20))
105+
})
106+
107+
test_that('d-ratio filter works as expected using mad euclidean',{
108+
set.seed('57475')
109+
# DatasetExperiment
110+
DE = MTBLS79_DatasetExperiment(filtered=TRUE)
111+
M = dratio_filter(
112+
threshold=20,
113+
qc_label = 'QC',
114+
factor_name='Class',
115+
method = 'euclidean',
116+
dispersion = 'mad'
10117
)
11118
M = model_apply(M,DE)
12119

@@ -17,17 +124,17 @@ test_that('d-ratio filter works as expected',{
17124
sa_mad=unlist(
18125
lapply(DE$data[DE$sample_meta$Class!='QC',],mad,na.rm=TRUE)
19126
)
20-
dr = (qc_mad/(qc_mad+sa_mad))*100
127+
dr = qc_mad/sqrt((qc_mad^2) + (sa_mad^2))*100
21128

22129
# check manual vs fcn
23130
expect_true(all(dr==M$d_ratio$d_ratio))
24131

25132
# check values havent changed
26-
expect_equal(dr[[1]],12.39,tolerance = 1e3)
27-
expect_equal(dr[[100]],32.54,tolerance = 1e3)
133+
expect_equal(dr[[1]],14.00808,tolerance = 5e-6)
134+
expect_equal(dr[[100]],43.44776,tolerance = 5e-6)
28135

29136
# just number of filtered columns
30-
expect_true(ncol(predicted(M))==1052)
31-
expect_true(ncol(DE)-ncol(predicted(M))==527)
137+
expect_true(ncol(predicted(M))==850)
138+
expect_true(ncol(DE)-ncol(predicted(M))==729)
32139
expect_true(ncol(DE)-ncol(predicted(M))==sum(dr>20))
33140
})

0 commit comments

Comments
 (0)