77# ' @return data.frame of the 'accessions, taxIds, and taxonomy
88# ' @export
99
10- get_taxonomy <- function (accessions , api_key = Sys.getenv( " NCBI_API_KEY " ) ) {
10+ get_taxonomy <- function (accessions ) {
1111 accessions <- unique(as.character(accessions ))
12- taxids <- accession2taxid(accessions , api_key )
12+ taxids <- accession2taxid(accessions )
1313
1414 taxonomy <- fetch_taxonomy(unique(taxids ))
1515 merge(
@@ -27,52 +27,36 @@ get_taxonomy <- function(accessions, api_key = Sys.getenv("NCBI_API_KEY")) {
2727# ' @return named vector of taxIds.
2828# ' @export
2929
30- accession2taxid <- function (accessions , api_key = Sys.getenv(" NCBI_API_KEY" )) {
31- url <- " https://eutils.ncbi.nlm.nih.gov/entrez/eutils/elink.fcgi"
32-
33- names(accessions ) <- rep(" id" , times = length(accessions ))
34- # query <- c(
35- # list(db = "taxonomy", dbfrom = "nuccore", idtype = "acc"),
36- # accessions[41:50]
37- # )
38-
39- request_base <-
40- httr2 :: request(url ) | >
41- httr2 :: req_method(" POST" )
42-
43- acc_chunks <- split(accessions , ceiling(seq_along(accessions ) / 100 ))
44- # this keeps the names from populating taxids
45- names(acc_chunks ) <- NULL
30+ accession2taxid <- function (accessions ) {
31+ url <- " https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi"
4632
4733 taxids <-
48- lapply(
49- acc_chunks ,
50- function (acc ) {
51- request_base | >
52- httr2 :: req_body_form(
53- db = " taxonomy" ,
54- dbfrom = " nuccore" ,
55- idtype = " acc" ,
56- id = acc ,
57- .multi = " explode"
58- ) | >
59- httr2 :: req_retry(max_tries = 5 ) | >
60- httr2 :: req_perform() | >
61- httr2 :: resp_body_string() | >
62- XML :: xmlParse() | >
63- XML :: xpathSApply(
64- " //LinkSet" ,
65- parse_LinkSet
66- )
67- }
34+ httr2 :: request(url ) | >
35+ httr2 :: req_method(" POST" ) | >
36+ httr2 :: req_body_form(
37+ db = " nuccore" ,
38+ id = paste0(accessions , collapse = " ," )
6839 ) | >
69- unlist()
40+ httr2 :: req_retry(max_tries = 5 ) | >
41+ httr2 :: req_perform() | >
42+ httr2 :: resp_body_string() | >
43+ XML :: xmlParse() | >
44+ XML :: xpathSApply(
45+ " //DocSum" ,
46+ parse_docsum
47+ )
7048
7149 taxids
7250}
73- parse_LinkSet <- function (LinkSet ) {
74- gid <- XML :: xpathSApply(LinkSet , " .//IdList/Id" , xmlValue )
75- taxid <- XML :: xpathSApply(LinkSet , " .//LinkSetDb/Link/Id" , xmlValue )
51+ parse_docsum <- function (docsum ) {
52+ gid <- XML :: xpathSApply(
53+ docsum ,
54+ " .//Item[@Name='AccessionVersion']" , XML :: xmlValue
55+ )
56+ taxid <- XML :: xpathSApply(
57+ docsum ,
58+ " .//Item[@Name='TaxId']" , XML :: xmlValue
59+ )
7660 if (length(taxid ) != 1 ) {
7761 taxid <- NA
7862 }
0 commit comments