Skip to content

Commit 78b29e1

Browse files
authored
Merge pull request #17 from nih-cfde/feat/cfde-opp-nums
add cfde-opportunity-numbers fetcher
2 parents 2e6d719 + f16872b commit 78b29e1

File tree

5 files changed

+96
-2
lines changed

5 files changed

+96
-2
lines changed

DESCRIPTION

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ Description:
1313
License: MIT + file LICENSE
1414
Encoding: UTF-8
1515
Roxygen: list(markdown = TRUE)
16-
RoxygenNote: 7.3.2
16+
RoxygenNote: 7.3.3
1717
Depends:
1818
R (>= 4.1.0)
1919
Imports:
@@ -27,7 +27,8 @@ Imports:
2727
stringr,
2828
tibble,
2929
glue,
30-
ghql
30+
ghql,
31+
rvest
3132
Suggests:
3233
gargle,
3334
gitcreds,

NAMESPACE

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# Generated by roxygen2: do not edit by hand
22

3+
export(cfde_opportunity_numbers)
34
export(epmc_search)
45
export(ga_dataframe)
56
export(ga_meta_simple)
@@ -46,6 +47,10 @@ importFrom(rlang,.data)
4647
importFrom(rlang,abort)
4748
importFrom(rlang,format_error_bullets)
4849
importFrom(rlang,inform)
50+
importFrom(rvest,html_attr)
51+
importFrom(rvest,html_nodes)
52+
importFrom(rvest,read_html)
53+
importFrom(stats,na.omit)
4954
importFrom(stats,setNames)
5055
importFrom(stringr,regex)
5156
importFrom(stringr,str_detect)

R/cfde_opportunity_numbers.R

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
#' All CFDE Funding Opportunity Numbers
2+
#'
3+
#' This function retrieves all CFDE funding
4+
#' opportunity numbers from the CFDE funding
5+
#' website, \url{https://commonfund.nih.gov/dataecosystem/FundingOpportunities}.
6+
#'
7+
#' Note that this function is specific to the CFDE
8+
#' program and is not a general-purpose web scraping
9+
#' function.
10+
#'
11+
#' @importFrom rvest read_html html_nodes html_attr
12+
#' @importFrom stats na.omit
13+
#'
14+
#'
15+
#' @param url The URL of the CFDE funding webpage
16+
#' @return a character vector of funding opportunity numbers.
17+
#'
18+
#' @examples
19+
#'
20+
#' \dontrun{
21+
#' browseURL("https://commonfund.nih.gov/dataecosystem/FundingOpportunities")
22+
#' }
23+
#'
24+
#' cfde_opportunity_numbers()
25+
#'
26+
#' @export
27+
cfde_opportunity_numbers <- function(
28+
url = "https://commonfund.nih.gov/dataecosystem/FundingOpportunities"
29+
) {
30+
31+
hrefs <- rvest::read_html(url) |>
32+
rvest::html_nodes("a") |>
33+
rvest::html_attr("href")
34+
hrefs_filtered <- grep('NOT|RFA|OTA', hrefs, value = TRUE)
35+
36+
pattern <- "(RFA|OTA|NOT)-[A-Z]{2}-\\d{2}-\\d{3}"
37+
38+
matches <- regmatches(hrefs_filtered, regexpr(pattern, hrefs_filtered, perl=TRUE))
39+
matches <- na.omit(matches)
40+
matches[nzchar(matches)]
41+
}

man/cfde_opportunity_numbers.Rd

Lines changed: 35 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
test_that("cfde_opportunity_numbers returns a character vector", {
2+
# Test with a known CFDE opportunity number
3+
result <- cfde_opportunity_numbers()
4+
expect_type(result, "character")
5+
})
6+
7+
test_that("cfde_opportunity_numbers returns expected pattern", {
8+
# Test that the returned opportunity numbers match the expected pattern
9+
result <- cfde_opportunity_numbers()
10+
pattern <- "(RFA|OTA|NOT)-[A-Z]{2}-\\d{2}-\\d{3}"
11+
expect_true(all(grepl(pattern, result)))
12+
})

0 commit comments

Comments
 (0)