Skip to content

Commit ada378c

Browse files
committed
Updated accession2taxid function to use esummary instead of elink
This fixes a curl error that was happening
1 parent a8e04c5 commit ada378c

File tree

2 files changed

+27
-42
lines changed

2 files changed

+27
-42
lines changed

DESCRIPTION

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ Imports:
2525
ggplot2,
2626
grid,
2727
httr,
28+
httr2,
2829
lubridate,
2930
plyr,
3031
reshape2,

R/taxonomy.R

Lines changed: 26 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,9 @@
77
#' @return data.frame of the 'accessions, taxIds, and taxonomy
88
#' @export
99

10-
get_taxonomy <- function(accessions, api_key = Sys.getenv("NCBI_API_KEY")) {
10+
get_taxonomy <- function(accessions) {
1111
accessions <- unique(as.character(accessions))
12-
taxids <- accession2taxid(accessions, api_key)
12+
taxids <- accession2taxid(accessions)
1313

1414
taxonomy <- fetch_taxonomy(unique(taxids))
1515
merge(
@@ -27,52 +27,36 @@ get_taxonomy <- function(accessions, api_key = Sys.getenv("NCBI_API_KEY")) {
2727
#' @return named vector of taxIds.
2828
#' @export
2929

30-
accession2taxid <- function(accessions, api_key = Sys.getenv("NCBI_API_KEY")) {
31-
url <- "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/elink.fcgi"
32-
33-
names(accessions) <- rep("id", times = length(accessions))
34-
# query <- c(
35-
# list(db = "taxonomy", dbfrom = "nuccore", idtype = "acc"),
36-
# accessions[41:50]
37-
# )
38-
39-
request_base <-
40-
httr2::request(url) |>
41-
httr2::req_method("POST")
42-
43-
acc_chunks <- split(accessions, ceiling(seq_along(accessions) / 100))
44-
# this keeps the names from populating taxids
45-
names(acc_chunks) <- NULL
30+
accession2taxid <- function(accessions) {
31+
url <- "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi"
4632

4733
taxids <-
48-
lapply(
49-
acc_chunks,
50-
function(acc) {
51-
request_base |>
52-
httr2::req_body_form(
53-
db = "taxonomy",
54-
dbfrom = "nuccore",
55-
idtype = "acc",
56-
id = acc,
57-
.multi = "explode"
58-
) |>
59-
httr2::req_retry(max_tries = 5) |>
60-
httr2::req_perform() |>
61-
httr2::resp_body_string() |>
62-
XML::xmlParse() |>
63-
XML::xpathSApply(
64-
"//LinkSet",
65-
parse_LinkSet
66-
)
67-
}
34+
httr2::request(url) |>
35+
httr2::req_method("POST") |>
36+
httr2::req_body_form(
37+
db = "nuccore",
38+
id = paste0(accessions, collapse = ",")
6839
) |>
69-
unlist()
40+
httr2::req_retry(max_tries = 5) |>
41+
httr2::req_perform() |>
42+
httr2::resp_body_string() |>
43+
XML::xmlParse() |>
44+
XML::xpathSApply(
45+
"//DocSum",
46+
parse_docsum
47+
)
7048

7149
taxids
7250
}
73-
parse_LinkSet <- function(LinkSet) {
74-
gid <- XML::xpathSApply(LinkSet, ".//IdList/Id", xmlValue)
75-
taxid <- XML::xpathSApply(LinkSet, ".//LinkSetDb/Link/Id", xmlValue)
51+
parse_docsum <- function(docsum) {
52+
gid <- XML::xpathSApply(
53+
docsum,
54+
".//Item[@Name='AccessionVersion']", XML::xmlValue
55+
)
56+
taxid <- XML::xpathSApply(
57+
docsum,
58+
".//Item[@Name='TaxId']", XML::xmlValue
59+
)
7660
if (length(taxid) != 1) {
7761
taxid <- NA
7862
}

0 commit comments

Comments
 (0)