|
| 1 | +# load ccle and tcga data |
| 2 | +ccle<- readRDS(file = 'ccle.rds') |
| 3 | +tcga<- readRDS(file = 'tcga.rds') |
| 4 | + |
| 5 | +# the selected gene signatures (100/200/300) include up-regulated and down-regulated genes |
| 6 | +# m_genenumber represent the number (50/100/150) of up-regulated and down-regulated genes, half of the number (100/200/300) of selected gene signatures |
| 7 | + |
| 8 | +m_genenumber<- 50 |
| 9 | +ranking_res<- list() |
| 10 | + |
| 11 | +for (i in 1:2) { |
| 12 | + m_sample<- colnames(tcga)[i+1] |
| 13 | + res<- as.data.frame(matrix(0,nrow = 720,ncol = 1)) |
| 14 | + rownames(res)<- colnames(ccle)[-1] |
| 15 | + colnames(res)<- m_sample |
| 16 | + m_sampledata<- tcga[,c(1,i+1)] |
| 17 | + m_sampledata[,2]<- as.numeric(m_sampledata[,2]) |
| 18 | + m_sampledata<- m_sampledata[!is.na(m_sampledata[,2]),] |
| 19 | + m_sampledata<- m_sampledata[order(-m_sampledata[,2]),] |
| 20 | + rownames(m_sampledata)<- 1:nrow(m_sampledata) |
| 21 | + m_up<- m_sampledata[c(1:m_genenumber),] |
| 22 | + rownames(m_up)<- 1:m_genenumber |
| 23 | + m_up$order<- 1:m_genenumber |
| 24 | + m_down<- m_sampledata[(c(nrow(m_sampledata))-m_genenumber+1):c(nrow(m_sampledata)),] |
| 25 | + rownames(m_down)<- 1:m_genenumber |
| 26 | + m_down$order<- 1:m_genenumber |
| 27 | + a<- numeric() |
| 28 | + b<- numeric() |
| 29 | + for(k in 1:720){ |
| 30 | + m_ref<- ccle[,c(1,k+1)] |
| 31 | + m_ref[,2]<- as.numeric(m_ref[,2]) |
| 32 | + m_ref<- m_ref[!is.na(m_ref[,2]),] |
| 33 | + m_ref<- m_ref[order(-m_ref[,2]),] |
| 34 | + rownames(m_ref)<- 1:nrow(m_ref) |
| 35 | + m_ref$order<- 1:nrow(m_ref) |
| 36 | + for(n in 1:m_genenumber){ |
| 37 | + if(nrow(m_up[m_up$Gene %in% m_ref$Gene,])==m_genenumber){ |
| 38 | + m_up<- m_up[m_up$Gene %in% m_ref$Gene,] |
| 39 | + m_up$order<- 1:m_genenumber |
| 40 | + break |
| 41 | + } |
| 42 | + m_up<- m_sampledata[c(1:(m_genenumber+n)),] |
| 43 | + } |
| 44 | + for(m in 1:m_genenumber){ |
| 45 | + m_gene<- m_up[m,1] |
| 46 | + m_gene<- m_ref[m_ref$Gene %in% m_gene,] |
| 47 | + m_gene<- m_gene[1,3] |
| 48 | + a[m]<- (m/m_genenumber)-m_gene/nrow(m_ref) |
| 49 | + b[m]<- m_gene/nrow(m_ref)-(m-1)/m_genenumber |
| 50 | + } |
| 51 | + a<- max(a) |
| 52 | + b<- max(b) |
| 53 | + ks1<- ifelse(a>b,a,-b) |
| 54 | + for(n in 1:m_genenumber){ |
| 55 | + if(nrow(m_down[m_down$Gene %in% m_ref$Gene,])==m_genenumber){ |
| 56 | + m_down<- m_down[m_down$Gene %in% m_ref$Gene,] |
| 57 | + m_down$order<- 1:m_genenumber |
| 58 | + break |
| 59 | + } |
| 60 | + m_down<- m_sampledata[(c(nrow(m_sampledata))-m_genenumber+1-n):c(nrow(m_sampledata)),] |
| 61 | + } |
| 62 | + |
| 63 | + for(m in 1:m_genenumber){ |
| 64 | + m_gene<- m_down[m,1] |
| 65 | + m_gene<- m_ref[m_ref$Gene %in% m_gene,] |
| 66 | + m_gene<- m_gene[1,3] |
| 67 | + a[m]<- (m/m_genenumber)-m_gene/nrow(m_ref) |
| 68 | + b[m]<- m_gene/nrow(m_ref)-(m-1)/m_genenumber |
| 69 | + } |
| 70 | + a<- max(a) |
| 71 | + b<- max(b) |
| 72 | + ks2<- ifelse(a>b,a,-b) |
| 73 | + |
| 74 | + res[k,1]<- ks1-ks2 |
| 75 | + } |
| 76 | + res[res[,1]>0,1]<- res[res[,1]>0,1]/max(res[,1]) |
| 77 | + res[res[,1]<0,1]<- res[res[,1]<0,1]/(-min(res[,1])) |
| 78 | + res$order<- rank(-res[,1]) |
| 79 | + res[,1]<- res[,2] |
| 80 | + |
| 81 | + ranking_res[[i]]<- res |
| 82 | + names(ranking_res)[i]<- colnames(res)[1] |
| 83 | +} |
| 84 | + |
| 85 | +# clean results |
| 86 | +res<- ranking_res[[1]] |
| 87 | +for (i in 2:length(ranking_res)) { |
| 88 | + res1<- ranking_res[[i]] |
| 89 | + res<- cbind(res,res1[,1]) |
| 90 | +} |
| 91 | +res<- res[,-2] |
| 92 | +colnames(res)<- names(ranking_res) |
| 93 | + |
| 94 | +# select top 1/3/5 (m_cellnumber) candidate cell lines for each patient |
| 95 | +# calculate the ranking score for each cell line towards each tumor subtype |
| 96 | + |
| 97 | +m_cellnumber<- 5 |
| 98 | +res_gps<- list() |
| 99 | + |
| 100 | +# load ccle and tcga cln data |
| 101 | +ccle_cln<- readRDS(file = '/ccle_cln.rds') |
| 102 | +tcga_cln<- readRDS(file = '/tcga_cln.rds') |
| 103 | + |
| 104 | +tumor_sub1<- unique(tcga_cln[tcga_cln$subtype_hist != 'NA',c("tumor","subtype_hist")]) |
| 105 | +tumor_sub2<- unique(tcga_cln[tcga_cln$subtype_mol1 != 'NA',c("tumor","subtype_mol1")]) |
| 106 | +tumor_sub3<- unique(tcga_cln[tcga_cln$subtype_mol2 != 'NA',c("tumor","subtype_mol2")]) |
| 107 | +colnames(tumor_sub1)[2]<- 'subtype' |
| 108 | +colnames(tumor_sub2)[2]<- 'subtype' |
| 109 | +colnames(tumor_sub3)[2]<- 'subtype' |
| 110 | +tumor_sub<- rbind(tumor_sub1,tumor_sub2,tumor_sub3) |
| 111 | +rownames(tumor_sub)<- 1:nrow(tumor_sub) |
| 112 | + |
| 113 | +for (i in 1:44) { |
| 114 | + if (i %in% c(1:39)) { |
| 115 | + tcga_cln1<- tcga_cln[tcga_cln$tumor == tumor_sub$tumor[i] & tcga_cln$subtype_hist == tumor_sub$subtype[i],] |
| 116 | + ccle_cln1<- ccle_cln[ccle_cln$TUMOR_TYPE == tumor_sub$tumor[i],] |
| 117 | + res1<- res[ccle_cln1$ID,tcga_cln1$patient_id] |
| 118 | + |
| 119 | + candidate_cells<- NULL |
| 120 | + for (j in 1:ncol(res1)) { |
| 121 | + res1<- res1[order(res1[,j]),] |
| 122 | + candidate_cells1<- rownames(res1)[1:m_cellnumber] |
| 123 | + candidate_cells<- c(candidate_cells,candidate_cells1) |
| 124 | + } |
| 125 | + candidate_cells<- as.data.frame(table(candidate_cells),stringsAsFactors = F) |
| 126 | + candidate_cells$Freq<- candidate_cells$Freq/max(candidate_cells$Freq) |
| 127 | + candidate_cells$rank<- rank(-candidate_cells$Freq) |
| 128 | + candidate_cells$tumor<- tumor_sub$tumor[i] |
| 129 | + candidate_cells$subtype<- tumor_sub$subtype[i] |
| 130 | + colnames(candidate_cells)[2]<- 'gps_score' |
| 131 | + candidate_cells<- candidate_cells[,c(4,5,1,2,3)] |
| 132 | + res_gps[[i]]<- candidate_cells |
| 133 | + names(res_gps)[i]<- paste(tumor_sub$tumor[i],tumor_sub$subtype[i],sep = '_') |
| 134 | + } |
| 135 | + if (i %in% c(40:43)) { |
| 136 | + tcga_cln1<- tcga_cln[tcga_cln$tumor == tumor_sub$tumor[i] & tcga_cln$subtype_mol1 == tumor_sub$subtype[i],] |
| 137 | + ccle_cln1<- ccle_cln[ccle_cln$TUMOR_TYPE == tumor_sub$tumor[i],] |
| 138 | + res1<- res[ccle_cln1$ID,tcga_cln1$patient_id] |
| 139 | + |
| 140 | + candidate_cells<- NULL |
| 141 | + for (j in 1:ncol(res1)) { |
| 142 | + res1<- res1[order(res1[,j]),] |
| 143 | + candidate_cells1<- rownames(res1)[1:m_cellnumber] |
| 144 | + candidate_cells<- c(candidate_cells,candidate_cells1) |
| 145 | + } |
| 146 | + candidate_cells<- as.data.frame(table(candidate_cells),stringsAsFactors = F) |
| 147 | + candidate_cells$Freq<- candidate_cells$Freq/max(candidate_cells$Freq) |
| 148 | + candidate_cells$rank<- rank(-candidate_cells$Freq) |
| 149 | + candidate_cells$tumor<- tumor_sub$tumor[i] |
| 150 | + candidate_cells$subtype<- tumor_sub$subtype[i] |
| 151 | + colnames(candidate_cells)[2]<- 'gps_score' |
| 152 | + candidate_cells<- candidate_cells[,c(4,5,1,2,3)] |
| 153 | + res_gps[[i]]<- candidate_cells |
| 154 | + names(res_gps)[i]<- paste(tumor_sub$tumor[i],tumor_sub$subtype[i],sep = '_') |
| 155 | + } |
| 156 | + if (i == 44) { |
| 157 | + tcga_cln1<- tcga_cln[tcga_cln$tumor == tumor_sub$tumor[i] & tcga_cln$subtype_mol2 == tumor_sub$subtype[i],] |
| 158 | + ccle_cln1<- ccle_cln[ccle_cln$TUMOR_TYPE == tumor_sub$tumor[i],] |
| 159 | + res1<- res[ccle_cln1$ID,tcga_cln1$patient_id] |
| 160 | + |
| 161 | + candidate_cells<- NULL |
| 162 | + for (j in 1:ncol(res1)) { |
| 163 | + res1<- res1[order(res1[,j]),] |
| 164 | + candidate_cells1<- rownames(res1)[1:m_cellnumber] |
| 165 | + candidate_cells<- c(candidate_cells,candidate_cells1) |
| 166 | + } |
| 167 | + candidate_cells<- as.data.frame(table(candidate_cells),stringsAsFactors = F) |
| 168 | + candidate_cells$Freq<- candidate_cells$Freq/max(candidate_cells$Freq) |
| 169 | + candidate_cells$rank<- rank(-candidate_cells$Freq) |
| 170 | + candidate_cells$tumor<- tumor_sub$tumor[i] |
| 171 | + candidate_cells$subtype<- tumor_sub$subtype[i] |
| 172 | + colnames(candidate_cells)[2]<- 'gps_score' |
| 173 | + candidate_cells<- candidate_cells[,c(4,5,1,2,3)] |
| 174 | + res_gps[[i]]<- candidate_cells |
| 175 | + names(res_gps)[i]<- paste(tumor_sub$tumor[i],tumor_sub$subtype[i],sep = '_') |
| 176 | + } |
| 177 | +} |
| 178 | + |
| 179 | + |
| 180 | + |
| 181 | + |
| 182 | + |
| 183 | + |
| 184 | + |
| 185 | + |
| 186 | + |
| 187 | + |
| 188 | + |
| 189 | + |
| 190 | + |
0 commit comments