Skip to content

Commit a3cc43d

Browse files
authored
Add files via upload
0 parents  commit a3cc43d

File tree

4 files changed

+893
-0
lines changed

4 files changed

+893
-0
lines changed

Fiberomics_EnrichmentFig5.Rmd

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
---
2+
title: "R Notebook"
3+
output: html_notebook
4+
editor_options:
5+
chunk_output_type: console
6+
---
7+
8+
```{r}
9+
pacman::p_load("tidyverse","readxl","stringr","limma", "edgeR", "gplots", "ggplot2","ComplexHeatmap","dendextend","vsn", "circlize", "Homo.sapiens", "AnnotationDbi", "tidyHeatmap", "openxlsx", "enrichplot", "clusterProfiler")
10+
```
11+
12+
#Need to run Fibreomics_PostFibreComparisons first
13+
14+
```{r}
15+
all_genes_M <- comp_M_POST_IvII$uniprot
16+
diff_genes_M <- subset(comp_M_POST_IvII, adj.P.Val < 0.05)
17+
diff_genes_M <- diff_genes_M$uniprot
18+
19+
all_genes_S <- comp_S_POST_IvII$uniprot
20+
diff_genes_S <- subset(comp_M_POST_IvII, adj.P.Val < 0.05)
21+
diff_genes_S <- diff_genes_S$uniprot
22+
```
23+
24+
```{r}
25+
mydf_testM <- comp_M_POST_IvII
26+
mydf_testM <- mydf_testM[c(1,2)]
27+
mydf_testM$exercise <- "MICT"
28+
mydf_testM$fibre <- "I"
29+
mydf_testM$fibre[(mydf_testM$logFC) > 0] <- "II"
30+
31+
32+
mydf_testS <- comp_S_POST_IvII
33+
mydf_testS <- mydf_testS[c(1,2)]
34+
mydf_testS$exercise <- "SIT"
35+
mydf_testS$fibre <- "I"
36+
mydf_testS$fibre[(mydf_testS$logFC) > 0] <- "II"
37+
38+
merged <- rbind(mydf_testS, mydf_testM)
39+
40+
enrichkey <- merged$uniprot
41+
enrichdict <- AnnotationDbi::select(Homo.sapiens, keys=key, columns=c("ENTREZID", "SYMBOL", "GENEID"), keytype = "UNIPROT")
42+
enrichdict <- enrichdict[!duplicated(enrichdict$UNIPROT), ]
43+
enrichmerge<- merge(merged, enrichdict, by.x = "uniprot", by.y = "UNIPROT")
44+
45+
```
46+
47+
```{r}
48+
comp_M_POST_IvII_posthoc$uniprot <- rownames(comp_M_POST_IvII_posthoc)
49+
mydf_testM <- comp_M_POST_IvII_posthoc
50+
mydf_testM <- mydf_testM[c(1,7)]
51+
mydf_testM$exercise <- "MICT"
52+
mydf_testM$fibre <- "I"
53+
mydf_testM$fibre[(mydf_testM$logFC) > 0] <- "II"
54+
55+
comp_S_POST_IvII_posthoc$uniprot <- rownames(comp_S_POST_IvII_posthoc)
56+
mydf_testS <- comp_S_POST_IvII_posthoc
57+
mydf_testS <- mydf_testS[c(1,7)]
58+
mydf_testS$exercise <- "SIT"
59+
mydf_testS$fibre <- "I"
60+
mydf_testS$fibre[(mydf_testS$logFC) > 0] <- "II"
61+
62+
merged <- rbind(mydf_testS, mydf_testM)
63+
64+
enrichkey <- merged$uniprot
65+
enrichdict <- AnnotationDbi::select(Homo.sapiens, keys=key, columns=c("ENTREZID", "SYMBOL"), keytype = "UNIPROT")
66+
enrichdict <- enrichdict[!duplicated(enrichdict$UNIPROT), ]
67+
enrichmerge<- merge(merged, enrichdict, by.x = "uniprot", by.y = "UNIPROT")
68+
69+
```
70+
71+
```{r}
72+
formula_res2 <- compareCluster(ENTREZID~exercise+fibre, data=enrichmerge, fun="enrichWP", organism = "Homo sapiens")
73+
74+
dotplot(formula_res2, showCategory=15, by = "count") + facet_grid(.~exercise, scale="free")
75+
```
76+
77+
## functional annotation test
78+
79+
```{r}
80+
OG <- formula_res2@compareClusterResult
81+
testdata <- formula_res2@compareClusterResult
82+
83+
#subset to test
84+
#testdata <- testdata[c(7:8),]
85+
testdata <- as.data.frame(testdata)
86+
testdata2 <- testdata[c(2,3,5,11)]
87+
88+
nmax <- max(stringr::str_count(testdata2$geneID, "/")) + 1
89+
testdata2 <- tidyr::separate(testdata2, geneID, paste0("col", seq_len(nmax)), sep = "/", fill = "right")
90+
91+
testdata3 <- testdata2 %>% gather(Column, geneID, -"Description", -"exercise", -"fibre")
92+
testdata3 <- na.omit(testdata3)
93+
testdata3 <- testdata3 %>% unite("Cluster", 1:2, sep = ".")
94+
95+
#info
96+
testinfo <- enrichmerge
97+
testinfo <- testinfo %>% unite("Cluster", 3:4, sep = ".")
98+
99+
testmerge <- merge(testinfo, testdata3, by.x=c("ENTREZID", "Cluster"), by.y = c("geneID","Cluster"))
100+
101+
avgtest <- testmerge %>% group_by(Description, Cluster) %>% summarize(avglog = mean(logFC))
102+
103+
avgmerge <- merge(avgtest, testdata, by=c("Description", "Cluster" ))
104+
avgmerge <- as.data.frame(avgmerge)
105+
avgmerge <- avgmerge[order(avgmerge$Cluster),]
106+
107+
formula_res2@compareClusterResult <- avgmerge
108+
109+
dotplot(formula_res2, showCategory = 10, color = "avglog", by = "count") + scale_color_gradient2(low = "blue",high = "red")
110+
111+
112+
geneidtosymbol <- avgmerge
113+
geneidtosymbol <- geneidtosymbol[c(1,2,12)]
114+
nmax <- max(stringr::str_count(geneidtosymbol$geneID, "/")) + 1
115+
geneidtosymbol <- tidyr::separate(geneidtosymbol, geneID, paste0("col", seq_len(nmax)), sep = "/", fill = "right")
116+
geneidtosymbol <- geneidtosymbol %>% gather(Column, geneID, -"Description", -"Cluster")
117+
geneidtosymbol <- na.omit(geneidtosymbol)
118+
geneidtosymbol <- merge(geneidtosymbol, testinfo, by.x = c("geneID","Cluster"), by.y = c("ENTREZID","Cluster"))
119+
geneidtosymbol <- geneidtosymbol[c(2,3,7)]
120+
geneidtosymbol2 <- geneidtosymbol %>% group_by(Description, Cluster) %>% summarise(SYMBOL = paste(unique(SYMBOL), collapse = "/" ))
121+
122+
123+
mergedsymbols <- merge(geneidtosymbol2, avgmerge)
124+
#dropgeneID
125+
mergedsymbols <- mergedsymbols[-c(13)]
126+
colnames(mergedsymbols)[3] <- "geneID"
127+
128+
formula_res2@compareClusterResult <- mergedsymbols
129+
130+
cnetplot(formula_res2, showCategory = c("Aerobic glycolysis", "Amino acid metabolism", "Striated muscle contraction pathway", "Fatty acid beta-oxidation", "Glycogen synthesis and degradation", "Glycolysis and gluconeogenesis", "Cori cycle", "TCA cycle (aka Krebs or citric acid cycle)"))
131+
132+
```
133+

Fiberomics_Normalisation.Rmd

Lines changed: 213 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,213 @@
1+
---
2+
title: "combattrial_liz"
3+
author: "nikeisha"
4+
date: "23/10/2020"
5+
output: html_document
6+
editor_options:
7+
chunk_output_type: console
8+
---
9+
10+
```{r}
11+
pacman::p_load("dplyr", "tidyverse","readxl","stringr","limma", "edgeR", "gplots", "ggplot2","ComplexHeatmap","dendextend", "imputeLCMD","vsn", "circlize", "COMBAT", "sva")
12+
13+
```
14+
15+
```{r}
16+
# read input file
17+
file_input <- file.choose()
18+
19+
raw_data <-read_xlsx(file_input)
20+
dim(raw_data)
21+
head(raw_data)
22+
23+
high_confi_no_contam <- raw_data %>%
24+
#remove low and mid confidence proteins and contaminants
25+
filter(`Protein FDR Confidence: Combined` == "High", Contaminant =="FALSE", `# Unique Peptides` > 1) %>%
26+
# select normalized abundance reporter ion intensity
27+
dplyr::select(Accession, Description, starts_with("Abundanc")) %>%
28+
#remove empty 3 channels
29+
dplyr::select(!contains(c("127C", "130N", "133N")))
30+
31+
32+
dim(high_confi_no_contam)
33+
34+
#save the accession, but remove from data frame
35+
accession <- high_confi_no_contam [1:2]
36+
data_only <- as.data.frame(high_confi_no_contam[3:106])
37+
#attach accession as the row names of the data
38+
row.names(data_only) <- accession$Accession
39+
head(data_only)
40+
dim(data_only)
41+
42+
#plot non-normalized data
43+
boxplot(log2(data_only), col = rep(c('red', 'green', 'blue', "magenta", "pink","black", "light blue", "light green"), each =13), notch = TRUE)
44+
45+
plotDensities(log2(data_only), col = rep(c("red", "green", "blue", "pink","orange","yellow","purple","grey"), each = 13), main = "Raw MQ data", legend = FALSE)
46+
47+
```
48+
49+
```{r}
50+
#bring in sample matrix
51+
#load sample data
52+
sampledata <- read_tsv("data/sampledata_fibres.txt", col_names = TRUE)
53+
54+
sampledata <- as.data.frame(sampledata)
55+
rownames(sampledata) <- sampledata$newsamplename
56+
sampledata <- sampledata %>% separate(newsamplename, c("subject", "training", "time", "fibre"))
57+
```
58+
59+
```{r}
60+
#There were issues with the reference channels and so they were removed, and normalisation below was introduced to compensate.
61+
62+
data_only2 <- data_only %>%
63+
# select normalized abundance reporter ion intensity
64+
dplyr::select(starts_with("Abundanc")) %>%
65+
#remove reference channels
66+
dplyr::select(!contains(c("126")))
67+
68+
boxplot(log2(data_only2), col = rep(c("red", "green", "blue", "pink","orange","yellow","purple","grey"), each =12), notch = TRUE, horizontal = TRUE)
69+
70+
plotDensities(log2(data_only2), col = rep(c("red", "green", "blue", "pink","orange","yellow","purple","grey"), each = 13), main = "Raw MQ data")
71+
72+
#remove repeats and bad samples
73+
data_only2 <- data_only2[-c(47,48,88,90,61,87)]
74+
```
75+
76+
#presence/abs graph
77+
```{r}
78+
presabs <- data_only2
79+
presabs[presabs > 0] <- 1
80+
presabs[is.na(presabs)] <- 0
81+
82+
Heatmap(presabs, show_row_names = FALSE, show_row_dend = FALSE)
83+
```
84+
85+
```{r}
86+
#remove large missing data and impute the rest
87+
data_raw<- data_only2
88+
data_raw <- data_raw[-which(rowMeans(is.na(data_raw)) > 0.30),]
89+
data_raw <- as.matrix(data_raw)
90+
data_raw_impute <- impute.knn(data_raw)
91+
data_raw_impute <- data_raw_impute[[1]]
92+
93+
data_raw <- as.data.frame(data_raw_impute)
94+
```
95+
96+
```{r}
97+
#still do SL norm first
98+
# check the column totals (per channel sums)
99+
format(round(colSums(data_raw, na.rm = TRUE), digits = 0), big.mark = ",")
100+
101+
# sample loading normalization
102+
exp1_raw <- data_raw[c(1:12)]
103+
exp2_raw <- data_raw[c(13:24)]
104+
exp3_raw <- data_raw[c(25:36)]
105+
exp4_raw <- data_raw[c(37:46)]
106+
exp5_raw <- data_raw[c(47:58)]
107+
exp6_raw <- data_raw[c(59:69)]
108+
exp7_raw <- data_raw[c(70:81)]
109+
exp8_raw <- data_raw[c(82:90)]
110+
111+
data_no_na<- cbind(exp1_raw,exp2_raw,exp3_raw,exp4_raw,exp5_raw,exp6_raw,exp7_raw, exp8_raw)
112+
data_no_na<- na.omit(data_no_na)
113+
dim(data_no_na)
114+
exp1_raw <- data_raw[c(1:12)]
115+
exp2_raw <- data_raw[c(13:24)]
116+
exp3_raw <- data_raw[c(25:36)]
117+
exp4_raw <- data_raw[c(37:46)]
118+
exp5_raw <- data_raw[c(47:58)]
119+
exp6_raw <- data_raw[c(59:69)]
120+
exp7_raw <- data_raw[c(70:81)]
121+
exp8_raw <- data_raw[c(82:90)]
122+
123+
exp1_raw <- exp1_for_sl
124+
exp2_raw <- exp2_for_sl
125+
exp3_raw <- exp3_for_sl
126+
exp4_raw <- exp4_for_sl
127+
exp5_raw <- exp5_for_sl
128+
exp6_raw <- exp6_for_sl
129+
exp7_raw <- exp7_for_sl
130+
exp8_raw <- exp8_for_sl
131+
132+
# first basic normalization is to adjust each TMT experiment to equal signal per channel
133+
# figure out the global scaling value
134+
target <- mean(c(colSums(exp1_raw,na.rm = T), colSums(exp2_raw,na.rm = T), colSums(exp3_raw,na.rm = T), colSums(exp4_raw,na.rm = T),
135+
colSums(exp5_raw,na.rm = T),colSums(exp6_raw,na.rm = T),colSums(exp7_raw,na.rm = T),colSums(exp8_raw,na.rm = T)))
136+
# do the sample loading normalization before the IRS normalization
137+
# there is a different correction factor for each column
138+
norm_facs <- target / colSums(exp1_raw, na.rm = T)
139+
exp1_sl <- sweep(exp1_raw, 2, norm_facs, FUN = "*")
140+
norm_facs <- target / colSums(exp2_raw, na.rm = T)
141+
exp2_sl <- sweep(exp2_raw, 2, norm_facs, FUN = "*")
142+
norm_facs <- target / colSums(exp3_raw, na.rm = T)
143+
exp3_sl <- sweep(exp3_raw, 2, norm_facs, FUN = "*")
144+
norm_facs <- target / colSums(exp4_raw, na.rm = T)
145+
exp4_sl <- sweep(exp4_raw, 2, norm_facs, FUN = "*")
146+
norm_facs <- target / colSums(exp5_raw, na.rm = T)
147+
exp5_sl <- sweep(exp5_raw, 2, norm_facs, FUN = "*")
148+
norm_facs <- target / colSums(exp6_raw, na.rm = T)
149+
exp6_sl <- sweep(exp6_raw, 2, norm_facs, FUN = "*")
150+
norm_facs <- target / colSums(exp7_raw, na.rm = T)
151+
exp7_sl <- sweep(exp7_raw, 2, norm_facs, FUN = "*")
152+
norm_facs <- target / colSums(exp8_raw, na.rm = T)
153+
exp8_sl <- sweep(exp8_raw, 2, norm_facs, FUN = "*")
154+
155+
# make a pre-IRS data frame after sample loading normalizations
156+
data_sl <- cbind(exp1_sl, exp2_sl, exp3_sl, exp4_sl, exp5_sl, exp6_sl, exp7_sl, exp8_sl)
157+
158+
boxplot(log2(data_sl), col = rep(c("red", "green", "blue", "pink","orange","yellow","purple","grey"), each = 12), notch = TRUE)
159+
plotDensities(log2(data_sl), col = rep(c("red", "green", "blue", "pink","orange","yellow","purple","grey"), each = 12), main = "Raw MQ data")
160+
```
161+
162+
163+
```{r}
164+
#combat step
165+
sampledata$training <- factor(sampledata$training)
166+
mod <- model.matrix(~ training, data = sampledata)
167+
batch <- sampledata$batch
168+
169+
# run ComBat as alternative to IRS
170+
# NOTE: SL norm is probably better to do before applying the batch corection
171+
data_combat <- ComBat_seq(counts = as.matrix(data_sl), batch = batch)
172+
data_combat <- as.data.frame(data_combat)
173+
par(mfrow = c(1, 1)) # any plotting in the ComBat call leaves plots as 2x2
174+
175+
# ComBat introduces some negative corrected counts that we need to fix
176+
#data_combat <- data_combat[apply(data_combat, 1, function(x) all(x >= 0)), ]
177+
178+
boxplot(log2(data_combat), notch = TRUE, col = rep(c("red", "green", "blue", "pink","orange","yellow","purple","grey"), each = 12),
179+
main = "ComBat batch correction of SL data\nExp1 (red), Exp2 (green), Exp3 (blue)",
180+
xlab = 'TMT Sample', ylab = 'log2 of Intensity')
181+
182+
plotDensities(log2(data_combat), col = rep(c("red", "green", "blue", "pink","orange","yellow","purple","grey"), 12), main = "ComBat data")
183+
```
184+
185+
186+
```{r}
187+
#apply TMM afterwards
188+
format(round(colSums(data_combat), digits = 0), big.mark = ",")
189+
190+
# apply TMM normalization to the ComBat-corrected data
191+
combat_tmm <- calcNormFactors(data_combat)
192+
data_combat_tmm <- sweep(data_combat, 2, combat_tmm, FUN = "/")
193+
194+
# look at the box plots
195+
boxplot(log2(data_combat_tmm), notch = TRUE, col = rep(c("red", "green", "blue", "pink","orange","yellow","purple","grey"), each = 12),
196+
main = "ComBat batch corrected with TMM\nExp1 (red), Exp2 (green), Exp3 (blue)",
197+
xlab = 'TMT Sample', ylab = 'log2 of Intensity')
198+
199+
# can also look at density plots (like a distribution histogram)
200+
plotDensities(log2(data_combat_tmm), col = rep(c("red", "green", "blue", "pink","orange","yellow","purple","grey"), 12), main = "ComBat/TMM data")
201+
202+
203+
# ggsave(
204+
# "results/densityplot.pdf",
205+
# plotDensities(log2(data_combat_tmm), col = rep(c("red", "green", "blue", "pink","orange","yellow","purple","grey"), 12), main = "ComBat/TMM data", legend = FALSE),
206+
# width = 10,
207+
# height = 9,
208+
# dpi = 300,
209+
# useDingbats=FALSE
210+
# )
211+
```
212+
213+

0 commit comments

Comments
 (0)