Skip to content

Commit f2aa19c

Browse files
author
Sebastian Kurscheid
committed
repairing commit history
1 parent 603b2a3 commit f2aa19c

17 files changed

+1247
-23
lines changed

.gitattributes

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
*.gz filter=lfs diff=lfs merge=lfs -text
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
{
2+
"folders": [
3+
{
4+
"path": "."
5+
}
6+
],
7+
"settings": {}
8+
}

R/BIOGRID-ALL-3.5.170.tab2.txt.gz

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:2bcadf92dfb8563fadcf4750db699e174917163986bb25514a650d43f422af71
3+
size 70731142
Lines changed: 260 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,260 @@
1+
## local functions
2+
median.quartile <- function(x){
3+
out <- quantile(x, probs = c(0.25,0.5,0.75))
4+
names(out) <- c("ymin","y","ymax")
5+
return(out)
6+
}
7+
8+
9+
dataDir <- "/home/sebastian/Data/Collaborations/FSU/PromoterSeqCap/sortingTSVS for Tremethick paper /Figure 1/"
10+
l1 <- lapply(list.files(path = dataDir, pattern=".tsv"), function(x){
11+
dt <- data.table::fread(paste(dataDir, x, sep = "/"))
12+
return(dt)
13+
})
14+
n1 <- gsub(x = list.files(path = dataDir, pattern=".tsv"), pattern = ".tsv", replacement = "")
15+
names(l1) <- unlist(lapply(strsplit(n1, "_"), function(x) paste(x[2:3], collapse = "_")))
16+
17+
l1 <- lapply(l1, function(x) {
18+
ucscID <- unlist(lapply(strsplit(x$gene, ";"), function(x) x[4]))
19+
extGene <- unlist(lapply(strsplit(x$gene, ";"), function(x) x[6]))
20+
x$ucscID <- ucscID
21+
x$extGene <- extGene
22+
x$chr <- unlist(lapply(strsplit(x$gene, ";"), function(x) x[1]))
23+
x$start <- unlist(lapply(strsplit(x$gene, ";"), function(x) x[2]))
24+
x$end <- unlist(lapply(strsplit(x$gene, ";"), function(x) x[3]))
25+
return(x)
26+
})
27+
l1
28+
gr.promoters <- GenomicRanges::makeGRangesFromDataFrame(l1$Total_10A, ignore.strand = T, keep.extra.columns = T)
29+
30+
rm(dt1)
31+
dt1 <- merge(subset(l1$Total_10A, select = c("extGene", "group1")), subset(l1$Total_TGFb, select = c("extGene", "group1")), by.x = "extGene", by.y = "extGene")
32+
colnames(dt1)[2:3] <- c("MCF10A_WT", "MCF10A_TGFb")
33+
dt1 <- merge(dt1, subset(l1$Total_shZ, select = c("extGene", "group1")), by.x = "extGene", by.y = "extGene")
34+
dt1 <- merge(dt1, subset(l1$Total_CA1a, select = c("extGene", "group1")), by.x = "extGene", by.y = "extGene")
35+
colnames(dt1)[4:5] <- c("MCF10A_shZ", "MCF10CA1A")
36+
colnames(dt1)
37+
38+
39+
# For input (figure 1)
40+
# We have decided on the following classes
41+
# 1. Strong +2 (1 for 10A, 1 for TGFb, 1 for shH2AZ, 1 for CAC1)
42+
# 2. Strong +1 (2 for 10A, 2 for TGFb, 2 for shH2AZ, 2 for CAC1)
43+
# 3. weak upstream (3 for 10A, 5 for TGFb, 5 for shH2AZ, 5 for CAC1)
44+
# 4. weak array across promoter (4 for 10A, 7 for TGFb, 7 for shH2AZ, 7 for CAC1)
45+
# 5. Strong -2 (5 for 10A, 6 for TGFb, 6 for shH2AZ, 6 for CAC1)
46+
# 6. Strong -1 (6 for 10A, 4 for TGFb, 4 for shH2AZ, 4 for CAC1)
47+
# 7. Overall depleted (7 for 10A, 3 for TGFb, 3 for shH2AZ, 3 for CAC1)
48+
49+
mcf10awtCategories <- data.table::data.table(cat = c("Strong +2", "Strong +1", "weak upstream", "weak array across promoter", "Strong -2", "Strong -1", "Overall depleted"),
50+
group = c(1,2,3,4,5,6,7))
51+
52+
mcf10atgfbCategories <- data.table::data.table(cat = c("Strong +2", "Strong +1", "weak upstream", "weak array across promoter", "Strong -2", "Strong -1", "Overall depleted"),
53+
group = c(1,2,5,7,6,4,3))
54+
55+
mcf10ashh2azCategories <- data.table::data.table(cat = c("Strong +2", "Strong +1", "weak upstream", "weak array across promoter", "Strong -2", "Strong -1", "Overall depleted"),
56+
group = c(1,2,5,7,6,4,3))
57+
58+
mcf10cac1Categories <- data.table::data.table(cat = c("Strong +2", "Strong +1", "weak upstream", "weak array across promoter", "Strong -2", "Strong -1", "Overall depleted"),
59+
group = c(1,2,5,7,6,4,3))
60+
61+
dt1$MCF10A_WT.category <- as.factor(mcf10awtCategories$cat[match(dt1$MCF10A_WT, mcf10awtCategories$group)])
62+
dt1$MCF10A_TGFb.category <- as.factor(mcf10atgfbCategories$cat[match(dt1$MCF10A_TGFb, mcf10atgfbCategories$group)])
63+
dt1$MCF10A_shZ.category <- as.factor(mcf10ashh2azCategories$cat[match(dt1$MCF10A_shZ, mcf10ashh2azCategories$group)])
64+
dt1$MCF10CA1A.category <- as.factor(mcf10cac1Categories$cat[match(dt1$MCF10CA1A, mcf10cac1Categories$group)])
65+
66+
ggparallel::ggparallel(list("MCF10A_WT.category", "MCF10A_TGFb.category"), #, "MCF10A_shZ.category", "MCF10CA1A.category"),
67+
data = dt1,
68+
label.size = 4,
69+
text.angle = 0, label.colour= "black", same.level = F)
70+
71+
ggparallel::ggparallel(list("MCF10A_WT.category", "MCF10A_shZ.category"), #, , "MCF10CA1A.category"),
72+
data = dt1,
73+
label.size = 4,
74+
text.angle = 0, label.colour= "black", same.level = F)
75+
76+
ggparallel::ggparallel(list("MCF10A_WT.category", "MCF10CA1A.category"), #, , ),
77+
data = dt1,
78+
label.size = 4,
79+
text.angle = 0, label.colour= "black", same.level = F)
80+
81+
factor(dt1$MCF10A_TGFb.category)
82+
83+
table(dt1$MCF10A_WT.category)
84+
table(dt1$MCF10A_TGFb.category)
85+
table("WT" = dt1$MCF10A_WT.category, "TGFb" = dt1$MCF10A_TGFb.category)
86+
table("WT" = dt1$MCF10A_WT.category, "shZ" = dt1$MCF10A_shZ.category)
87+
table("WT" = dt1$MCF10A_WT.category, "WT" = dt1$MCF10A_WT.category)
88+
table("TGFb" = dt1$MCF10A_TGFb.category, "TGFb" = dt1$MCF10A_TGFb.category)
89+
90+
levels(dt1$MCF10A_shZ.category)
91+
92+
## gather table
93+
Fig2Categories <- dt1[, c("extGene", "MCF10A_WT.category", "MCF10A_TGFb.category", "MCF10A_shZ.category", "MCF10CA1A.category")]
94+
Fig2Categories <- data.table(gather(Fig2Categories, key = "condition", value = "category", c("MCF10A_WT.category", "MCF10A_TGFb.category", "MCF10A_shZ.category", "MCF10CA1A.category")))
95+
Fig2Categories$condition <- unlist(lapply(strsplit(Fig2Categories$condition, "\\."), function(x) x[1]))
96+
Fig2Categories$category <- factor(Fig2Categories$category, levels = levels(as.factor(Fig2Categories$category))[c(5,4,7,6,3,2,1)])
97+
98+
deGenesTGFbIDs <- deGenesTGFb[deGenesTGFb$qval < 0.1]$target_id
99+
deGenesTGFbUpIDs <- deGenesTGFb[deGenesTGFb$qval < 0.1 & deGenesTGFb$b > 0]$target_id
100+
deGenesTGFbDownIDs <- deGenesTGFb[deGenesTGFb$qval < 0.1 & deGenesTGFb$b < 0]$target_id
101+
102+
left <- "MCF10A_WT"
103+
right <- "MCF10A_TGFb"
104+
left.categories <- llist
105+
left.categories <- c("Overall depleted", "weak array across promoter")
106+
df1 <- data.table(Fig2Categories[condition == left]$extGene, Fig2Categories[condition == left]$category, Fig2Categories[condition == right]$category)
107+
colnames(df1) <- c("extGene", left, right)
108+
#df1 <- df1[extGene %in% deGenesTGFbUpIDs]
109+
#df1 <- df1[extGene %in% deGenesTGFbDownIDs]
110+
df1 <- df1[extGene %in% sigEMTCells$cellLine_sig]
111+
112+
df1 <- df2["epi"]
113+
data <- data.frame(df1[ df1[[left]] %in% left.categories , ])
114+
ggparallel::ggparallel(vars = list(left, right), data = data, same.level = T, label.size = 3,
115+
text.angle = 0, label.colour= "black") + ggtitle("Epithelial")
116+
117+
geneIDs <- data$extGene
118+
s1 <- which(kt1D6$target_id %in% geneIDs)
119+
s2 <- which(deGenesTGFb$target_id %in% sigEMTCells$cellLine_sig)
120+
s2
121+
plot(deGenesTGFb[s2]$b, -log10(deGenesTGFb[s2]$qval))
122+
123+
bp1 <- ggplot(data = kt1D6[s1],
124+
aes(x= condition, y = log2(tpm + 1), colour = condition)) + geom_boxplot()
125+
bp1
126+
bp2 <- ggplot(data =m3[m3[geneIDs][condition == "MCF10A_wt"][category %in% "Strong +1"]$target_id],
127+
aes(x= condition, y = log2(tpm + 1), colour = condition)) + geom_boxplot() + facet_wrap(~category)
128+
129+
bp2
130+
vp1 <- ggplot(data = kt1D6[s1],
131+
aes(x= condition, y = log2(tpm + 1)), fill = condition) + geom_violin()
132+
vp1
133+
h1 <- ggplot(data = kt1D6[s1],
134+
aes(log2(tpm + 1), ..density.., fill = condition)) + geom_histogram(binwidth = 0.11, alpha = 0.5)
135+
h1
136+
137+
###
138+
data <- as.data.frame(df1)
139+
vars <- unlist(list(left, right))
140+
llist <- NULL
141+
same.level = T
142+
for (i in unique(vars)) {
143+
if (!same.level) levels(data[,i]) <- paste(i, levels(data[,i]), sep=":")
144+
llist <- unique(c(llist, levels(data[,i])))
145+
}
146+
(llist)
147+
## integrate expression data
148+
kt1 <- results$full$kallisto_table_genes
149+
#kt1 <- results$full$kallisto_table_genes
150+
setkey(kt1, "target_id")
151+
152+
kt1D6 <- kt1[grep("D6", kt1$sample)]
153+
kt1D6.wide <- tidyr::spread(kt1D6[, c("target_id", "sample", "tpm")], sample, tpm)
154+
kt1D6 <- kt1D6[, list("tpm" = mean(tpm)), by = list(condition, target_id)]
155+
s1 <- dt1[(dt1$MCF10A_WT.category == "Strong +1" & dt1$MCF10A_TGFb.category == "Strong -1")]$extGene
156+
s2 <- which(kt1D6.wide$target_id %in% s1)
157+
s1 <- which(kt1D6$target_id %in% s1)
158+
length(s1)
159+
160+
gplots::heatmap.2(scale(log2(as.matrix(subset(kt1D6.wide[s2], select = c("MCF10AD6_1",
161+
"MCF10AD6_2",
162+
#"MCF10AD6_3",
163+
"MCF10ATGFbD6_1",
164+
"MCF10ATGFbD6_2",
165+
"MCF10ATGFbD6_3"))) + 1)), trace = "none")
166+
m1 <- scale(log2(as.matrix(subset(kt1D6.wide[s2], select = c("MCF10AD6_1",
167+
"MCF10AD6_2",
168+
#"MCF10AD6_3",
169+
"MCF10ATGFbD6_1",
170+
"MCF10ATGFbD6_2",
171+
"MCF10ATGFbD6_3"))) + 1))
172+
m2 <- log2(as.matrix(subset(kt1D6.wide[s2], select = c("MCF10AD6_1",
173+
"MCF10AD6_2",
174+
#"MCF10AD6_3",
175+
"MCF10ATGFbD6_1",
176+
"MCF10ATGFbD6_2",
177+
"MCF10ATGFbD6_3"))) + 1)
178+
179+
rownames(m1) <- kt1D6.wide[s2]$target_id
180+
rownames(m2) <- kt1D6.wide[s2]$target_id
181+
182+
183+
head(m1)
184+
heatmaply::heatmaply(m1,#[sd1 > 0.3,],
185+
trace = "none",
186+
scale = "none",
187+
protocol = "ggplotly")
188+
library(vegan)
189+
km1 <- vegan::cascadeKM(m2, 2, 11, criterion = "calinski")
190+
plot(km1)
191+
data.table(km1$partition)
192+
g1 <- km1$partition[,5]
193+
names(g1) <- rownames(m2)
194+
g1
195+
setkey(kt1D6, "target_id")
196+
w1 <-which(kt1D6$target_id %in% names(g1))
197+
kt1D6.g1 <- kt1D6[w1]
198+
kmeansClusters <- data.table("extGene" = names(g1), "group" = g1)
199+
kt1D6.g1 <- merge(kt1D6.g1, kmeansClusters, by.x = "target_id", by.y = "extGene", all.y = T)
200+
201+
bp2 <- ggplot(data = kt1D6.g1,
202+
aes(x= condition, y = log2(tpm + 1), colour = condition)) +
203+
geom_boxplot() +
204+
facet_wrap(~group)
205+
bp2
206+
207+
vp2 <- ggplot(data = kt1D6.g1,
208+
aes(x= condition, y = log2(tpm + 1), colour = condition)) +
209+
geom_violin() +
210+
facet_wrap(~group) +
211+
stat_summary(fun.y=median.quartile,geom='point')
212+
vp2
213+
214+
h2 <- ggplot(data = kt1D6.g1,
215+
aes(log2(tpm + 1), fill = condition)) + geom_histogram(bins = 30, alpha = 0.5,)
216+
h2
217+
218+
f2 <- ggplot(data = kt1D6.g1,
219+
aes(log2(tpm + 1), colour = condition)) + geom_freqpoly(bins = 30, alpha = 0.5)
220+
f2
221+
222+
median(log2(kt1D6[s1][condition == "MCF10A_wt"]$tpm + 1))
223+
median(log2(kt1D6[s1][condition == "MCF10A_TGFb"]$tpm + 1))
224+
225+
median(log2(kt1D6[condition == "MCF10A_wt"]$tpm + 1))
226+
median(log2(kt1D6[condition == "MCF10A_TGFb"]$tpm + 1))
227+
228+
229+
# EMT genes-specific parallel plots ---------------------------------------
230+
ggparallel::ggparallel(list("MCF10A_WT.category", "MCF10A_shZ.category"), #, , "MCF10CA1A.category"),
231+
data = dt1[unique(ucscTranscriptsSigEMTCells[epi_mes == "epi"]$target_id)],
232+
label.size = 4,
233+
text.angle = 0, label.colour= "black", same.level = F) + ggtitle("shZ - EPI")
234+
"ParallelPlot_WT-shZ_EPI_Genes.pdf"
235+
236+
237+
ggparallel::ggparallel(list("MCF10A_WT.category", "MCF10A_shZ.category"), #, , "MCF10CA1A.category"),
238+
data = dt1[unique(ucscTranscriptsSigEMTCells[epi_mes == "mes"]$target_id)],
239+
label.size = 4,
240+
text.angle = 0, label.colour= "black", same.level = F) + ggtitle("shZ - MES")
241+
"ParallelPlot_WT-shZ_MES_Genes.pdf"
242+
243+
#----- TGFb
244+
ggparallel::ggparallel(list("MCF10A_WT.category", "MCF10A_TGFb.category"), #, , "MCF10CA1A.category"),
245+
data = dt1[unique(ucscTranscriptsSigEMTCells$target_id)],
246+
label.size = 4,
247+
text.angle = 0, label.colour= "black", same.level = F)
248+
249+
ggparallel::ggparallel(list("MCF10A_WT.category", "MCF10A_TGFb.category"), #, , "MCF10CA1A.category"),
250+
data = dt1[unique(ucscTranscriptsSigEMTCells[epi_mes == "epi"]$target_id)],
251+
label.size = 4,
252+
text.angle = 0, label.colour= "black", same.level = F) + ggtitle("TGFb - EPI")
253+
"ParallelPlot_WT-TGFb_EPI_Genes.pdf"
254+
255+
ggparallel::ggparallel(list("MCF10A_WT.category", "MCF10A_TGFb.category"), #, , "MCF10CA1A.category"),
256+
data = dt1[unique(ucscTranscriptsSigEMTCells[epi_mes == "mes"]$target_id)],
257+
label.size = 4,
258+
text.angle = 0, label.colour= "black", same.level = F) + ggtitle("TGFb - MES")
259+
"ParallelPlot_WT-TGFb_MES_Genes.pdf"
260+

0 commit comments

Comments
 (0)