@@ -221,7 +221,88 @@ colnames(mouse_ppi1)<- c('ligand','receptor',
221221# load uniprot protein knowledegbase
222222mouse_uniprot <- readRDS(' data/mouse_uniprot.rds' )
223223
224+ # geneating keyword in searching term for API
225+ mouse_ppi1 $ search_term <- ' NA'
224226
227+ for (i in 1 : nrow(mouse_ppi1 )) {
228+ print(i )
229+ gene1 <- mouse_ppi1 $ ligand_gene_symbol [i ]
230+ gene1_name <- unique(mouse_gene_info [mouse_gene_info $ Symbol == gene1 ,]$ Synonyms )
231+ gene1_name <- gene1_name [which(gene1_name != ' -' )]
232+
233+ if (gene1 %in% mouse_uniprot $ gene ) {
234+ gene1_pro <- unique(mouse_uniprot [mouse_uniprot $ gene == gene1 ,]$ protein )
235+ gene1 <- c(gene1 ,gene1_pro )
236+ }
237+
238+ gene1 <- c(gene1 ,gene1_name )
239+ gene1 <- unique(gene1 )
240+ gene1_search_API <- paste0(gene1 ,' %5BTitle%2FAbstract%5D' )
241+ gene1_name <- gene1 [1 ]
242+ gene1_name_search_API <- gene1_search_API [1 ]
243+
244+ if (length(gene1 ) > 1 ) {
245+ for (j in 2 : length(gene1 )) {
246+ gene1_name <- paste(gene1_name ,gene1 [j ],sep = ' ,' )
247+ gene1_name_search_API <- paste(gene1_name_search_API ,' OR' ,gene1_search_API [j ],sep = ' +' )
248+ }
249+ }
250+
251+
252+ gene2 <- mouse_ppi1 $ receptor_gene_symbol [i ]
253+ gene2_name <- unique(mouse_gene_info [mouse_gene_info $ Symbol == gene2 ,]$ Synonyms )
254+ gene2_name <- gene2_name [which(gene2_name != ' -' )]
255+
256+ if (gene2 %in% mouse_uniprot $ gene ) {
257+ gene2_pro <- unique(mouse_uniprot [mouse_uniprot $ gene == gene2 ,]$ protein )
258+ gene2 <- c(gene2 ,gene2_pro )
259+ }
260+
261+ gene2 <- c(gene2 ,gene2_name )
262+ gene2 <- unique(gene2 )
263+ gene2_search_API <- paste0(gene2 ,' %5BTitle%2FAbstract%5D' )
264+ gene2_name <- gene2 [1 ]
265+ gene2_name_search_API <- gene2_search_API [1 ]
266+
267+ if (length(gene2 ) > 1 ) {
268+ for (j in 2 : length(gene2 )) {
269+ gene2_name <- paste(gene2_name ,gene2 [j ],sep = ' ,' )
270+ gene2_name_search_API <- paste(gene2_name_search_API ,' OR' ,gene2_search_API [j ],sep = ' +' )
271+ }
272+ }
273+
274+ gene1_name_search_API <- paste0(' %28' ,gene1_name_search_API ,' %29' )
275+ gene2_name_search_API <- paste0(' %28' ,gene2_name_search_API ,' %29' )
276+
277+ gene_name_search_API <- paste(gene1_name_search_API ,' AND' ,gene2_name_search_API ,sep = ' +' )
278+
279+ mouse_ppi1 [i ," search_term" ]<- gene_name_search_API
280+
281+ }
282+
283+
284+ # Exclude LR pairs without matched articles with Pubmed E-utilities
285+
286+ # Warning: please read the rule of NCBI E-utilities usage carefully before running the codes below.
287+ mouse_ppi1 $ count <- ' -1'
288+
289+ for (i in 1 : nrow(human_ppi1 )) {
290+ print(i )
291+ d1 <- mouse_ppi1 [i ,]
292+ d1_term <- d1 $ search_term
293+ # API key is removed
294+ d1_url <- paste(' https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term=' ,d1_term ,sep = ' ' )
295+ d1_res <- getURL(url = d1_url )
296+ d1_1 <- str_locate(string = d1_res ,pattern = ' <Count>' )
297+ d1_2 <- str_locate(string = d1_res ,pattern = ' </Count>' )
298+ d1_res1 <- str_sub(string = d1_res ,start = d1_1 [2 ]+ 1 ,end = d1_2 [1 ]- 1 )
299+ mouse_ppi1 $ count [i ]<- d1_res1
300+ # Sys.sleep is removed
301+ }
225302
303+ # Remove LR pairs without matched artciles
226304
305+ mouse_ppi1 $ count <- as.numeric(mouse_ppi1 $ count )
306+ mouse_ppi1 <- mouse_ppi1 [mouse_ppi1 $ count > 0 ,]
227307
308+ # obtain 222,222 potential LR pairs for manual verfication.
0 commit comments