Skip to content

Commit b076843

Browse files
committed
update
1 parent 8462c9e commit b076843

File tree

3 files changed

+82
-1
lines changed

3 files changed

+82
-1
lines changed

data/mouse_uniprot.rds

427 KB
Binary file not shown.

human_data_processing.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -247,7 +247,7 @@ human_uniprot<- readRDS('data/human_uniprot.rds')
247247
# geneating keyword in searching term for API
248248
human_ppi1$search_term<- 'NA'
249249

250-
for (i in 1:10) {
250+
for (i in 1:nrow(human_ppi1)) {
251251
print(i)
252252
gene1<- human_ppi1$ligand_gene_symbol[i]
253253
gene1_name<- unique(human_gene_info[human_gene_info$Symbol == gene1,]$Synonyms)

mouse_data_processing.R

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,88 @@ colnames(mouse_ppi1)<- c('ligand','receptor',
221221
# load uniprot protein knowledegbase
222222
mouse_uniprot<- readRDS('data/mouse_uniprot.rds')
223223

224+
# geneating keyword in searching term for API
225+
mouse_ppi1$search_term<- 'NA'
224226

227+
for (i in 1:nrow(mouse_ppi1)) {
228+
print(i)
229+
gene1<- mouse_ppi1$ligand_gene_symbol[i]
230+
gene1_name<- unique(mouse_gene_info[mouse_gene_info$Symbol == gene1,]$Synonyms)
231+
gene1_name<- gene1_name[which(gene1_name != '-')]
232+
233+
if (gene1 %in% mouse_uniprot$gene) {
234+
gene1_pro<- unique(mouse_uniprot[mouse_uniprot$gene == gene1,]$protein)
235+
gene1<- c(gene1,gene1_pro)
236+
}
237+
238+
gene1<- c(gene1,gene1_name)
239+
gene1<- unique(gene1)
240+
gene1_search_API<- paste0(gene1,'%5BTitle%2FAbstract%5D')
241+
gene1_name<- gene1[1]
242+
gene1_name_search_API<- gene1_search_API[1]
243+
244+
if (length(gene1) > 1) {
245+
for (j in 2:length(gene1)) {
246+
gene1_name<- paste(gene1_name,gene1[j],sep = ',')
247+
gene1_name_search_API<- paste(gene1_name_search_API,'OR',gene1_search_API[j],sep = '+')
248+
}
249+
}
250+
251+
252+
gene2<- mouse_ppi1$receptor_gene_symbol[i]
253+
gene2_name<- unique(mouse_gene_info[mouse_gene_info$Symbol == gene2,]$Synonyms)
254+
gene2_name<- gene2_name[which(gene2_name != '-')]
255+
256+
if (gene2 %in% mouse_uniprot$gene) {
257+
gene2_pro<- unique(mouse_uniprot[mouse_uniprot$gene == gene2,]$protein)
258+
gene2<- c(gene2,gene2_pro)
259+
}
260+
261+
gene2<- c(gene2,gene2_name)
262+
gene2<- unique(gene2)
263+
gene2_search_API<- paste0(gene2,'%5BTitle%2FAbstract%5D')
264+
gene2_name<- gene2[1]
265+
gene2_name_search_API<- gene2_search_API[1]
266+
267+
if (length(gene2) > 1) {
268+
for (j in 2:length(gene2)) {
269+
gene2_name<- paste(gene2_name,gene2[j],sep = ',')
270+
gene2_name_search_API<- paste(gene2_name_search_API,'OR',gene2_search_API[j],sep = '+')
271+
}
272+
}
273+
274+
gene1_name_search_API<- paste0('%28',gene1_name_search_API,'%29')
275+
gene2_name_search_API<- paste0('%28',gene2_name_search_API,'%29')
276+
277+
gene_name_search_API<- paste(gene1_name_search_API,'AND',gene2_name_search_API,sep = '+')
278+
279+
mouse_ppi1[i,"search_term"]<- gene_name_search_API
280+
281+
}
282+
283+
284+
# Exclude LR pairs without matched articles with Pubmed E-utilities
285+
286+
# Warning: please read the rule of NCBI E-utilities usage carefully before running the codes below.
287+
mouse_ppi1$count<- '-1'
288+
289+
for (i in 1:nrow(human_ppi1)) {
290+
print(i)
291+
d1<- mouse_ppi1[i,]
292+
d1_term<- d1$search_term
293+
# API key is removed
294+
d1_url<- paste('https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term=',d1_term,sep = '')
295+
d1_res<- getURL(url = d1_url)
296+
d1_1<- str_locate(string = d1_res,pattern = '<Count>')
297+
d1_2<- str_locate(string = d1_res,pattern = '</Count>')
298+
d1_res1<- str_sub(string = d1_res,start = d1_1[2]+1,end = d1_2[1]-1)
299+
mouse_ppi1$count[i]<- d1_res1
300+
# Sys.sleep is removed
301+
}
225302

303+
# Remove LR pairs without matched artciles
226304

305+
mouse_ppi1$count<- as.numeric(mouse_ppi1$count)
306+
mouse_ppi1<- mouse_ppi1[mouse_ppi1$count > 0,]
227307

308+
# obtain 222,222 potential LR pairs for manual verfication.

0 commit comments

Comments
 (0)