Skip to content

Commit bfe7a06

Browse files
committed
Initial upload
1 parent 9f0dbd5 commit bfe7a06

File tree

4 files changed

+190
-0
lines changed

4 files changed

+190
-0
lines changed

R/similarity_ranking.R

Lines changed: 190 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,190 @@
1+
# load ccle and tcga data
2+
ccle<- readRDS(file = 'ccle.rds')
3+
tcga<- readRDS(file = 'tcga.rds')
4+
5+
# the selected gene signatures (100/200/300) include up-regulated and down-regulated genes
6+
# m_genenumber represent the number (50/100/150) of up-regulated and down-regulated genes, half of the number (100/200/300) of selected gene signatures
7+
8+
m_genenumber<- 50
9+
ranking_res<- list()
10+
11+
for (i in 1:2) {
12+
m_sample<- colnames(tcga)[i+1]
13+
res<- as.data.frame(matrix(0,nrow = 720,ncol = 1))
14+
rownames(res)<- colnames(ccle)[-1]
15+
colnames(res)<- m_sample
16+
m_sampledata<- tcga[,c(1,i+1)]
17+
m_sampledata[,2]<- as.numeric(m_sampledata[,2])
18+
m_sampledata<- m_sampledata[!is.na(m_sampledata[,2]),]
19+
m_sampledata<- m_sampledata[order(-m_sampledata[,2]),]
20+
rownames(m_sampledata)<- 1:nrow(m_sampledata)
21+
m_up<- m_sampledata[c(1:m_genenumber),]
22+
rownames(m_up)<- 1:m_genenumber
23+
m_up$order<- 1:m_genenumber
24+
m_down<- m_sampledata[(c(nrow(m_sampledata))-m_genenumber+1):c(nrow(m_sampledata)),]
25+
rownames(m_down)<- 1:m_genenumber
26+
m_down$order<- 1:m_genenumber
27+
a<- numeric()
28+
b<- numeric()
29+
for(k in 1:720){
30+
m_ref<- ccle[,c(1,k+1)]
31+
m_ref[,2]<- as.numeric(m_ref[,2])
32+
m_ref<- m_ref[!is.na(m_ref[,2]),]
33+
m_ref<- m_ref[order(-m_ref[,2]),]
34+
rownames(m_ref)<- 1:nrow(m_ref)
35+
m_ref$order<- 1:nrow(m_ref)
36+
for(n in 1:m_genenumber){
37+
if(nrow(m_up[m_up$Gene %in% m_ref$Gene,])==m_genenumber){
38+
m_up<- m_up[m_up$Gene %in% m_ref$Gene,]
39+
m_up$order<- 1:m_genenumber
40+
break
41+
}
42+
m_up<- m_sampledata[c(1:(m_genenumber+n)),]
43+
}
44+
for(m in 1:m_genenumber){
45+
m_gene<- m_up[m,1]
46+
m_gene<- m_ref[m_ref$Gene %in% m_gene,]
47+
m_gene<- m_gene[1,3]
48+
a[m]<- (m/m_genenumber)-m_gene/nrow(m_ref)
49+
b[m]<- m_gene/nrow(m_ref)-(m-1)/m_genenumber
50+
}
51+
a<- max(a)
52+
b<- max(b)
53+
ks1<- ifelse(a>b,a,-b)
54+
for(n in 1:m_genenumber){
55+
if(nrow(m_down[m_down$Gene %in% m_ref$Gene,])==m_genenumber){
56+
m_down<- m_down[m_down$Gene %in% m_ref$Gene,]
57+
m_down$order<- 1:m_genenumber
58+
break
59+
}
60+
m_down<- m_sampledata[(c(nrow(m_sampledata))-m_genenumber+1-n):c(nrow(m_sampledata)),]
61+
}
62+
63+
for(m in 1:m_genenumber){
64+
m_gene<- m_down[m,1]
65+
m_gene<- m_ref[m_ref$Gene %in% m_gene,]
66+
m_gene<- m_gene[1,3]
67+
a[m]<- (m/m_genenumber)-m_gene/nrow(m_ref)
68+
b[m]<- m_gene/nrow(m_ref)-(m-1)/m_genenumber
69+
}
70+
a<- max(a)
71+
b<- max(b)
72+
ks2<- ifelse(a>b,a,-b)
73+
74+
res[k,1]<- ks1-ks2
75+
}
76+
res[res[,1]>0,1]<- res[res[,1]>0,1]/max(res[,1])
77+
res[res[,1]<0,1]<- res[res[,1]<0,1]/(-min(res[,1]))
78+
res$order<- rank(-res[,1])
79+
res[,1]<- res[,2]
80+
81+
ranking_res[[i]]<- res
82+
names(ranking_res)[i]<- colnames(res)[1]
83+
}
84+
85+
# clean results
86+
res<- ranking_res[[1]]
87+
for (i in 2:length(ranking_res)) {
88+
res1<- ranking_res[[i]]
89+
res<- cbind(res,res1[,1])
90+
}
91+
res<- res[,-2]
92+
colnames(res)<- names(ranking_res)
93+
94+
# select top 1/3/5 (m_cellnumber) candidate cell lines for each patient
95+
# calculate the ranking score for each cell line towards each tumor subtype
96+
97+
m_cellnumber<- 5
98+
res_gps<- list()
99+
100+
# load ccle and tcga cln data
101+
ccle_cln<- readRDS(file = '/ccle_cln.rds')
102+
tcga_cln<- readRDS(file = '/tcga_cln.rds')
103+
104+
tumor_sub1<- unique(tcga_cln[tcga_cln$subtype_hist != 'NA',c("tumor","subtype_hist")])
105+
tumor_sub2<- unique(tcga_cln[tcga_cln$subtype_mol1 != 'NA',c("tumor","subtype_mol1")])
106+
tumor_sub3<- unique(tcga_cln[tcga_cln$subtype_mol2 != 'NA',c("tumor","subtype_mol2")])
107+
colnames(tumor_sub1)[2]<- 'subtype'
108+
colnames(tumor_sub2)[2]<- 'subtype'
109+
colnames(tumor_sub3)[2]<- 'subtype'
110+
tumor_sub<- rbind(tumor_sub1,tumor_sub2,tumor_sub3)
111+
rownames(tumor_sub)<- 1:nrow(tumor_sub)
112+
113+
for (i in 1:44) {
114+
if (i %in% c(1:39)) {
115+
tcga_cln1<- tcga_cln[tcga_cln$tumor == tumor_sub$tumor[i] & tcga_cln$subtype_hist == tumor_sub$subtype[i],]
116+
ccle_cln1<- ccle_cln[ccle_cln$TUMOR_TYPE == tumor_sub$tumor[i],]
117+
res1<- res[ccle_cln1$ID,tcga_cln1$patient_id]
118+
119+
candidate_cells<- NULL
120+
for (j in 1:ncol(res1)) {
121+
res1<- res1[order(res1[,j]),]
122+
candidate_cells1<- rownames(res1)[1:m_cellnumber]
123+
candidate_cells<- c(candidate_cells,candidate_cells1)
124+
}
125+
candidate_cells<- as.data.frame(table(candidate_cells),stringsAsFactors = F)
126+
candidate_cells$Freq<- candidate_cells$Freq/max(candidate_cells$Freq)
127+
candidate_cells$rank<- rank(-candidate_cells$Freq)
128+
candidate_cells$tumor<- tumor_sub$tumor[i]
129+
candidate_cells$subtype<- tumor_sub$subtype[i]
130+
colnames(candidate_cells)[2]<- 'gps_score'
131+
candidate_cells<- candidate_cells[,c(4,5,1,2,3)]
132+
res_gps[[i]]<- candidate_cells
133+
names(res_gps)[i]<- paste(tumor_sub$tumor[i],tumor_sub$subtype[i],sep = '_')
134+
}
135+
if (i %in% c(40:43)) {
136+
tcga_cln1<- tcga_cln[tcga_cln$tumor == tumor_sub$tumor[i] & tcga_cln$subtype_mol1 == tumor_sub$subtype[i],]
137+
ccle_cln1<- ccle_cln[ccle_cln$TUMOR_TYPE == tumor_sub$tumor[i],]
138+
res1<- res[ccle_cln1$ID,tcga_cln1$patient_id]
139+
140+
candidate_cells<- NULL
141+
for (j in 1:ncol(res1)) {
142+
res1<- res1[order(res1[,j]),]
143+
candidate_cells1<- rownames(res1)[1:m_cellnumber]
144+
candidate_cells<- c(candidate_cells,candidate_cells1)
145+
}
146+
candidate_cells<- as.data.frame(table(candidate_cells),stringsAsFactors = F)
147+
candidate_cells$Freq<- candidate_cells$Freq/max(candidate_cells$Freq)
148+
candidate_cells$rank<- rank(-candidate_cells$Freq)
149+
candidate_cells$tumor<- tumor_sub$tumor[i]
150+
candidate_cells$subtype<- tumor_sub$subtype[i]
151+
colnames(candidate_cells)[2]<- 'gps_score'
152+
candidate_cells<- candidate_cells[,c(4,5,1,2,3)]
153+
res_gps[[i]]<- candidate_cells
154+
names(res_gps)[i]<- paste(tumor_sub$tumor[i],tumor_sub$subtype[i],sep = '_')
155+
}
156+
if (i == 44) {
157+
tcga_cln1<- tcga_cln[tcga_cln$tumor == tumor_sub$tumor[i] & tcga_cln$subtype_mol2 == tumor_sub$subtype[i],]
158+
ccle_cln1<- ccle_cln[ccle_cln$TUMOR_TYPE == tumor_sub$tumor[i],]
159+
res1<- res[ccle_cln1$ID,tcga_cln1$patient_id]
160+
161+
candidate_cells<- NULL
162+
for (j in 1:ncol(res1)) {
163+
res1<- res1[order(res1[,j]),]
164+
candidate_cells1<- rownames(res1)[1:m_cellnumber]
165+
candidate_cells<- c(candidate_cells,candidate_cells1)
166+
}
167+
candidate_cells<- as.data.frame(table(candidate_cells),stringsAsFactors = F)
168+
candidate_cells$Freq<- candidate_cells$Freq/max(candidate_cells$Freq)
169+
candidate_cells$rank<- rank(-candidate_cells$Freq)
170+
candidate_cells$tumor<- tumor_sub$tumor[i]
171+
candidate_cells$subtype<- tumor_sub$subtype[i]
172+
colnames(candidate_cells)[2]<- 'gps_score'
173+
candidate_cells<- candidate_cells[,c(4,5,1,2,3)]
174+
res_gps[[i]]<- candidate_cells
175+
names(res_gps)[i]<- paste(tumor_sub$tumor[i],tumor_sub$subtype[i],sep = '_')
176+
}
177+
}
178+
179+
180+
181+
182+
183+
184+
185+
186+
187+
188+
189+
190+

data/ccle_cln.rds

9.5 KB
Binary file not shown.

data/tcga_cln.rds

59.4 KB
Binary file not shown.

gps/ccl_cgps.rds

24.7 KB
Binary file not shown.

0 commit comments

Comments
 (0)