@@ -30,7 +30,9 @@ vignette: >
30
30
``` {r setup, include = FALSE}
31
31
knitr::opts_chunk$set(
32
32
collapse = TRUE,
33
- comment = "#>"
33
+ comment = "#>",
34
+ fig.width=5,
35
+ fig.height=5
34
36
)
35
37
```
36
38
@@ -118,7 +120,7 @@ acceptable level of technical variation where signal correction is not required.
118
120
The following code calculates and plots the RSD% values of the features within
119
121
the dataset.
120
122
121
- ``` {r fig.height=9 , fig.width=5 , message=FALSE, warning=FALSE}
123
+ ``` {r fig.height=5 , fig.width=4 , message=FALSE, warning=FALSE}
122
124
# separate the LCMS data from the meta data
123
125
data(MTBLS79)
124
126
data <- SummarizedExperiment::assay(MTBLS79[feature_names, ])
@@ -160,10 +162,11 @@ ggplot(data=plotdata, aes(x=Class, y=feature, fill=RSD)) +
160
162
161
163
A violin plot is a useful way of summarising the RSD% over all samples/QCs in
162
164
the data set. Note a very high QC sample RSD% value for feature '409.05716'.
163
- ``` {r, fig.width=6, fig.height=6 }
165
+ ``` {r}
164
166
ggplot(data=plotdata, aes(x=Class, y=RSD, fill=Class)) +
165
167
geom_violin(draw_quantiles=c(0.25,0.5,0.75)) +
166
168
ylab("RSD%") +
169
+ guides(fill=FALSE) +
167
170
theme(panel.background=element_blank())
168
171
```
169
172
@@ -173,7 +176,7 @@ and is more similar to the signal variation of the biological samples. We can
173
176
calculate similar statistics per batch and visualise the results with a box
174
177
plot.
175
178
176
- ``` {r message=FALSE, warning=FALSE, fig.width=6, fig. height=6}
179
+ ``` {r message=FALSE, warning=FALSE, fig.height=6}
177
180
# prepare some matrices to store the results
178
181
RSDQC <- matrix(ncol=8, nrow=nrow(data))
179
182
RSDsample <- matrix(ncol=8, nrow=nrow(data))
@@ -211,7 +214,10 @@ plotdata$Class <- as.factor(plotdata$Class)
211
214
ggplot(data=plotdata, aes(x=Class, y=RSD, fill=Class)) + geom_boxplot() +
212
215
facet_wrap(~ batch, ncol=3) +
213
216
ylab("RSD%") +
214
- theme(panel.background=element_blank())
217
+ xlab("") +
218
+ scale_x_discrete(labels=NULL) +
219
+ theme(panel.background=element_blank(), axis.text.x=element_blank(),
220
+ axis.ticks.x=element_blank())
215
221
```
216
222
217
223
** Summary of RSD% of QC samples**
@@ -232,7 +238,7 @@ An alternative measure of QC and biological sample variability is the so called
232
238
D-ratio, which indicates if the technical variation within the QC samples
233
239
exceeds the biological variation within biological samples.
234
240
235
- ``` {r message=FALSE, warning=FALSE, fig.width=6, fig.height=6 }
241
+ ``` {r message=FALSE, warning=FALSE}
236
242
237
243
# prepare a list of colours for plotting
238
244
manual_color = c("#386cb0", "#ef3b2c", "#7fc97f", "#fdb462", "#984ea3",
@@ -291,7 +297,7 @@ spectral features.
291
297
See @guida2016 for a more detailed review on common pre-processing steps and
292
298
methods.
293
299
294
- ``` {r, fig.width=6, fig.height=6 }
300
+ ``` {r, fig.width=6.5 , fig.height=5 }
295
301
pca_data <- MTBLS79[feature_names, ]
296
302
297
303
pca_data <- pqn_normalisation(pca_data, classes=class, qc_label="QC")
@@ -340,7 +346,7 @@ below illustrates the measured signal of QC samples across all 8 batches. To be
340
346
able to compare all 20 features measured at different signal ranges, the data
341
347
will be scaled to unit variance (UV).
342
348
343
- ``` {r message=FALSE, warning=FALSE, fig.height=12, fig.width=6 }
349
+ ``` {r message=FALSE, warning=FALSE, fig.height=10 }
344
350
345
351
# autoscale the QC data
346
352
QCdata <- data[ ,QChits]
@@ -365,7 +371,7 @@ across the eight batches, and that some features are following a similar
365
371
pattern, i.e. they are correlated. We can create a similar plot to the one
366
372
above including linear regression fit between measured data points.
367
373
368
- ``` {r, warning=FALSE, fig.height=12, fig.width=6 }
374
+ ``` {r, warning=FALSE, fig.height=10 }
369
375
ggplot(data=plotdata, aes(x=index, y=intensity, col=batch)) +
370
376
geom_point(size=2) +
371
377
facet_wrap(~ variable, ncol=4) +
@@ -379,7 +385,7 @@ calculate actual correlation values within QC samples for each measured
379
385
feature, and we will use Kendall's * tau* statistic to estimate a rank-based
380
386
measure of association.
381
387
382
- ``` {r message=FALSE, warning=FALSE, fig.height=15, fig.width=10 }
388
+ ``` {r message=FALSE, warning=FALSE, fig.height=7.5 }
383
389
sampleorder <- c(1:ncol(QCdata))
384
390
385
391
correlations <- matrix(ncol=2, nrow=nrow(data))
@@ -516,7 +522,7 @@ ggplot(data=plotdata, aes(x=batch, y=feature, fill=value)) +
516
522
Let's have a closer look to '451.01086' measured feature and how signal
517
523
correction can be applied.
518
524
519
- ``` {r, warning=FALSE, message=FALSE, fig.width=6, fig.height=6 }
525
+ ``` {r, warning=FALSE, message=FALSE}
520
526
data <- data.frame(data=
521
527
as.vector(SummarizedExperiment::assay(MTBLS79["451.01086", ])), batch=batch,
522
528
class=factor(class, ordered=TRUE))
@@ -534,7 +540,7 @@ measured intensities can be observed between analytical batches.
534
540
535
541
Similar plot for QC samples only
536
542
537
- ``` {r, fig.width=6, fig.height=6 }
543
+ ``` {r}
538
544
539
545
QCdata <- data[data$class == "QC",]
540
546
645
651
Now the smoothed spline fit is used to predict values for the biological
646
652
sample for the current batch.
647
653
648
- ``` {r, fig.width=6, fig.height=6 }
654
+ ``` {r}
649
655
valuePredict=predict(sp.obj, order[batch==nb])
650
656
651
657
plotchr <- as.numeric(data$class)
@@ -674,7 +680,7 @@ for signal drift. This can usually be done by subtracting the fitted values
674
680
from the actual measured values for each feature. To avoid getting negative
675
681
values we will add the median value of the feature to the corrected data.
676
682
677
- ``` {r, fig.width=6, fig.height=6, warning=FALSE}
683
+ ``` {r, warning=FALSE}
678
684
fitmedian <- median(plotdata$measured, na.rm=TRUE)
679
685
plotdata$corrected_subt <- (plotdata$measured - plotdata$fitted) + fitmedian
680
686
@@ -697,7 +703,7 @@ An alternative to subtraction of the fitted values is to divide them by the
697
703
median of the fit and use the resulting coefficients to correct the data points.
698
704
The same general relative trends should be observed in either case.
699
705
700
- ``` {r, fig.width=6, fig.height=6, warning=FALSE}
706
+ ``` {r, warning=FALSE}
701
707
plotdata$corrected_div <- plotdata$measured/(plotdata$fitted/fitmedian)
702
708
703
709
plotdata3 <- plotdata[,c("Class", "order", "corrected_subt", "corrected_div")]
@@ -720,7 +726,7 @@ ggplot(data=plotdata3, aes(x=order, y=intensity, color=data, shape=Class)) +
720
726
So far we have applied signal correction for data points within one analytical
721
727
batch. The code below will perform the same steps for each of the 8 batches.
722
728
723
- ``` {r, fig.width=6, fig.height=6, warning=FALSE}
729
+ ``` {r, warning=FALSE}
724
730
725
731
outl <- rep(NA, nrow(data))
726
732
@@ -766,7 +772,7 @@ ggplot(data=plotdata2, aes(x=order, y=log(intensity,10),
766
772
After smoothed spline fit per each batch is calculated, we can apply signal
767
773
correction within each batch.
768
774
769
- ``` {r, fig.width=6, fig.height=6, warning=FALSE}
775
+ ``` {r, warning=FALSE}
770
776
771
777
# median intensity value is used to adjust batch effect
772
778
@@ -807,7 +813,7 @@ First, a grand median is calculated across all batches, and then difference
807
813
between each batch median and the grand median is subtracted from all the
808
814
samples in that batch, to remove the difference.
809
815
810
- ``` {r, fig.width=6, fig.height=6, warning=FALSE}
816
+ ``` {r, warning=FALSE}
811
817
mpa <- rep(NA, nrow(data))
812
818
813
819
for (bch in 1:8) {
@@ -845,7 +851,7 @@ ggplot(data=plotdata2, aes(x=order, y=log(intensity,10),
845
851
846
852
We can calculate RSD% before and after correction.
847
853
848
- ``` {r, fig.width=6, fig.height=6, warning=FALSE}
854
+ ``` {r, warning=FALSE}
849
855
FUN <- function(x) sd(x, na.rm=TRUE)/mean(x, na.rm=TRUE) * 100
850
856
851
857
# RSD% of biological and QC samples within all 6 batches:
@@ -892,7 +898,7 @@ corrected_data <- QCRSC(df=data, order=sample_order, batch=batch,
892
898
We can calculate RSD% statistics per batch before and after correction and
893
899
visualise the results with a box plot.
894
900
895
- ``` {r, fig.width=6, fig.height=6, warning=FALSE}
901
+ ``` {r, warning=FALSE}
896
902
data <- SummarizedExperiment::assay(data)
897
903
corrected_data <- SummarizedExperiment::assay(corrected_data)
898
904
RSDQC <- matrix(ncol=8, nrow=nrow(data))
@@ -952,7 +958,10 @@ plotdata$Class <- as.factor(plotdata$Class)
952
958
ggplot(data=plotdata, aes(x=Class, y=RSD, fill=Class)) + geom_boxplot() +
953
959
facet_wrap(~ batch, ncol=3) +
954
960
ylab("RSD%") +
955
- theme(panel.background=element_blank()) +
961
+ xlab("") +
962
+ scale_x_discrete(labels=NULL) +
963
+ theme(panel.background=element_blank(), axis.text.x=element_blank(),
964
+ axis.ticks.x=element_blank()) +
956
965
scale_y_continuous(limits=c(0, 50))
957
966
958
967
plotdata <- rbind(plotdataBio, plotdataBio_corrected)
@@ -963,7 +972,9 @@ plotdata$Class <- as.factor(plotdata$Class)
963
972
ggplot(data=plotdata, aes(x=Class, y=RSD, fill=Class)) + geom_boxplot() +
964
973
facet_wrap(~ batch, ncol=3) +
965
974
ylab("RSD%") +
966
- theme(panel.background=element_blank())
975
+ xlab("") +
976
+ theme(panel.background=element_blank(), axis.text.x=element_blank(),
977
+ axis.ticks.x=element_blank())
967
978
968
979
```
969
980
0 commit comments