Skip to content

Commit 129db0e

Browse files
authored
Merge pull request #24 from nih-cfde/23-bespoke-function-to-create-excel-spreadsheet-of-results
Export Tabular Results
2 parents 78b29e1 + cd5b128 commit 129db0e

19 files changed

+339
-13
lines changed

.Rbuildignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,4 +11,5 @@ terraform/
1111
DEVELOPER.md
1212
^README\.Rmd$
1313
^CODE_OF_CONDUCT\.md$
14+
.xlsx$
1415

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,5 @@ docs
55
terraform/
66
/.quarto/
77
**/*.quarto_ipynb
8+
inst/secret/cfde-access-keyfile.json
9+
*.xlsx

DESCRIPTION

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
Package: programets
22
Title: Collect and analyze academic impact metrics from various sources
3-
Version: 0.3.0
3+
Version: 0.3.1
44
Authors@R:
5-
c(person("Sean", "Davis", , "[email protected]", role = c("aut", "cre"), comment=c(ORCID = "0000-0002-8991-6458")),
6-
person("David", "Mayer", , "[email protected]", role = c("aut")))
5+
c(
6+
person("Sean", "Davis", , "[email protected]", role = c("aut", "cre"), comment=c(ORCID = "0000-0002-8991-6458")),
7+
person("David", "Mayer", , "[email protected]", role = c("aut"), comment=c(ORCID = "0000-0002-6056-9771"))
8+
)
79
Description:
810
Collect and analyze academic impact metrics from various sources.
911
The package provides functions to collect and analyze data from NIH reporter,
@@ -28,7 +30,10 @@ Imports:
2830
tibble,
2931
glue,
3032
ghql,
31-
rvest
33+
openxlsx,
34+
tidyr,
35+
rvest,
36+
readr
3237
Suggests:
3338
gargle,
3439
gitcreds,

DEVELOPER.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,9 @@ Step 3: Encrypt the google service account json file
3838

3939
```{r}
4040
gargle::secret_encrypt_json(
41-
path = "path/to/ga4-acess-keyfile.json",
41+
path = "path/to/ga4-access-keyfile.json",
4242
key = "GARGLE_ENCRYPTION_KEY",
43-
output = "inst/secret/ga4-acess-keyfile.json"
43+
output = "inst/secret/ga4-access-keyfile.json"
4444
)
4545
```
4646
This will create an encrypted version of the json file in the `inst/secret` directory.
@@ -51,7 +51,7 @@ Step 4: Use the encrypted file in your code
5151
library(gargle)
5252
googleAnalyticsR::ga_auth(
5353
json_file = gargle::secret_decrypt_json(
54-
path = system.file("secret/ga4-acess-keyfile.json", package = "programets"),
54+
path = system.file("secret/ga4-access-keyfile.json", package = "programets"),
5555
key = "GARGLE_ENCRYPTION_KEY"
5656
)
5757
)

NAMESPACE

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,13 @@
22

33
export(cfde_opportunity_numbers)
44
export(epmc_search)
5+
export(export_tabular)
56
export(ga_dataframe)
67
export(ga_meta_simple)
78
export(ga_query_explorer)
89
export(get_core_project_info)
10+
export(get_ga_basic)
11+
export(get_ga_meta_by_id)
912
export(get_github_by_topic)
1013
export(get_github_by_topic_graphql)
1114
export(icite)
@@ -23,6 +26,8 @@ importFrom(dplyr,tibble)
2326
importFrom(ghql,GraphqlClient)
2427
importFrom(glue,glue)
2528
importFrom(glue,glue_collapse)
29+
importFrom(googleAnalyticsR,ga_account_list)
30+
importFrom(googleAnalyticsR,ga_auth)
2631
importFrom(httr2,req_auth_bearer_token)
2732
importFrom(httr2,req_body_json)
2833
importFrom(httr2,req_error)
@@ -36,12 +41,18 @@ importFrom(httr2,request)
3641
importFrom(httr2,resp_body_json)
3742
importFrom(httr2,resp_status)
3843
importFrom(jsonlite,fromJSON)
44+
importFrom(openxlsx,addWorksheet)
45+
importFrom(openxlsx,createWorkbook)
46+
importFrom(openxlsx,saveWorkbook)
47+
importFrom(openxlsx,writeData)
3948
importFrom(purrr,discard)
49+
importFrom(purrr,map)
4050
importFrom(purrr,map2_dbl)
4151
importFrom(purrr,map_chr)
4252
importFrom(purrr,map_dbl)
4353
importFrom(purrr,map_dfr)
4454
importFrom(purrr,pmap)
55+
importFrom(readr,write_csv)
4556
importFrom(rlang,"%||%")
4657
importFrom(rlang,.data)
4758
importFrom(rlang,abort)
@@ -54,4 +65,6 @@ importFrom(stats,na.omit)
5465
importFrom(stats,setNames)
5566
importFrom(stringr,regex)
5667
importFrom(stringr,str_detect)
68+
importFrom(stringr,str_remove)
5769
importFrom(tibble,tibble)
70+
importFrom(tidyr,separate)

R/export.R

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
#' Export to Tabular
2+
#'
3+
#' @param core_project_numbers A character vector of NIH Core Project Numbers
4+
#' @param token The token required for authentication with the GitHub API
5+
#' @param service_account_json A character string containing the path to a JSON file containing a Google service account
6+
#' @param dir A character string containing the path to directory where the Excel file will be written
7+
#' @param csv A logical indicating whether to write a CSV file
8+
#'
9+
#' @importFrom openxlsx createWorkbook addWorksheet writeData saveWorkbook
10+
#' @importFrom readr write_csv
11+
#' @importFrom rlang .data
12+
#' @export
13+
#'
14+
#' @examples
15+
#' \dontrun{
16+
#' test_projects <-c("OT2OD030545")
17+
#' }
18+
#'
19+
export_tabular <- function(core_project_numbers, token = gitcreds::gitcreds_get()$password, service_account_json = 'cfde-access-keyfile.json', dir, csv = FALSE) {
20+
21+
## Create Excel Workbook
22+
wb <- createWorkbook()
23+
24+
## Add NIH Project Info
25+
addWorksheet(wb, "project_info")
26+
proj_info <- get_core_project_info(core_project_numbers)
27+
writeData(wb = wb, sheet = "project_info", x = proj_info, na.string = "")
28+
if (csv) {
29+
write_csv(proj_info, file.path(dir, paste0("programets_proj_info_", Sys.Date(), ".csv", sep = "")))
30+
}
31+
32+
## Add Assosciated Publications
33+
addWorksheet(wb, "pub_info")
34+
pmids <- proj_info |>
35+
filter(.data$found_publication) |>
36+
pull('pmid')
37+
pub_info <- icite(pmids)
38+
writeData(wb = wb, sheet = "pub_info", x = pub_info, na.string = "")
39+
if (csv) {
40+
write_csv(pub_info, file.path(dir, paste0("programets_pub_info_", Sys.Date(), ".csv", sep = "")))
41+
}
42+
43+
## Add GitHub
44+
addWorksheet(wb, "github_info")
45+
github_info <- get_github_by_topic_graphql(core_project_numbers, token = token)
46+
writeData(wb = wb, sheet = "github_info", x = github_info, na.string = "")
47+
if (csv) {
48+
write_csv(github_info, file.path(dir, paste0("programets_github_info_", Sys.Date(), ".csv", sep = "")))
49+
}
50+
51+
## Add Google Analytics
52+
addWorksheet(wb, "ga_info")
53+
ga_info <- get_ga_basic(core_project_numbers = core_project_numbers, service_account_json = service_account_json)
54+
writeData(wb = wb, sheet = "ga_info", x = ga_info, na.string = "")
55+
if (csv) {
56+
write_csv(ga_info, file.path(dir, paste0("programets_ga_info_", Sys.Date(), ".csv", sep = "")))
57+
}
58+
59+
## Save Workbook
60+
saveWorkbook(wb, file.path(dir, paste0("programets_", Sys.Date(), ".xlsx", sep = "")))
61+
}

R/ga_meta_simple.R

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,4 +26,37 @@
2626
#' @export
2727
ga_meta_simple <- function() {
2828
tibble::as_tibble(googleAnalyticsR::ga_meta(version = "data"))
29+
}
30+
31+
#' Google Analytics metadata dataframe by property ID
32+
#'
33+
#' This function retrieves Google Analytics metadata by property ID
34+
#' and returns it as a dataframe. The metadata includes
35+
#' information about metrics, dimensions, and other
36+
#' attributes available in Google Analytics.
37+
#'
38+
#' This function is a wrapper around the
39+
#' `googleAnalyticsR::ga_meta()` function. It retrieves
40+
#' metadata for the Google Analytics API version 4.
41+
#'
42+
#' @param property_id The property ID for which to retrieve
43+
#' metadata.
44+
#'
45+
#' @note This function requires first authenticating to
46+
#' Google Analytics using the `ga_auth()` function.
47+
#'
48+
#' @family Google Analytics
49+
#'
50+
#' @examples
51+
#' \dontrun{
52+
#' res = get_ga_meta_by_id("123456789")
53+
#' head(res)
54+
#' dplyr::glimpse(res)
55+
#' }
56+
#'
57+
#' @return A tibble containing Google Analytics metadata.
58+
#'
59+
#' @export
60+
get_ga_meta_by_id <- function(property_id) {
61+
tibble::as_tibble(googleAnalyticsR::ga_meta(version = "data", propertyId = property_id))
2962
}

R/get_ga_basic.R

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
#' Get Basic Google Analytics Info
2+
#'
3+
#' This function takes a character vector of NIH Core Project Numbers and
4+
#' returns a data frame containing the any Google Analytics properties associated
5+
#' with the Core Project Numbers.
6+
#'
7+
#' @param core_project_numbers A character vector of NIH Core Project Numbers
8+
#' @param service_account_json A character string containing the path to a JSON file containing the
9+
#' Google service account credentials. If no file is provided, interactive authentication is used.
10+
#' Defaults to "cfde-access-keyfile.json"
11+
#'
12+
#' @importFrom googleAnalyticsR ga_account_list ga_auth
13+
#' @importFrom purrr map map_chr
14+
#' @importFrom stringr str_remove
15+
#' @importFrom tidyr separate
16+
#' @importFrom rlang .data
17+
#'
18+
#' @return A data frame containing the associated Google Analytics data
19+
#' @export
20+
get_ga_basic <- function(core_project_numbers, service_account_json = 'cfde-access-keyfile.json') {
21+
## This function requires authentication, check for existing creds
22+
## Package Credentials
23+
if(file.exists(system.file("secret", service_account_json, package = "programets")) &&
24+
!is.null(Sys.getenv("CFDE_ENCRYPTION_KEY"))){
25+
programets_service_account <- gargle::secret_decrypt_json(
26+
path = system.file(
27+
"secret",
28+
service_account_json,
29+
package = "programets"
30+
),
31+
key = "CFDE_ENCRYPTION_KEY"
32+
)
33+
googleAnalyticsR::ga_auth(
34+
json_file = programets_service_account
35+
)
36+
## User SA Credentials
37+
} else if (file.exists(service_account_json)) {
38+
ga_auth(json_file = service_account_json)
39+
## Interactive Auth
40+
} else {
41+
ga_auth()
42+
}
43+
44+
## Get All Analytics Properties
45+
core_project_regex <- paste0(unique(tolower(core_project_numbers)), collapse = "|")
46+
account_list <- ga_account_list("ga4") |>
47+
mutate(
48+
property_meta = suppressMessages(map(.data$propertyId, get_ga_meta_by_id)),
49+
core_project_num = map_chr(
50+
.data$property_meta,
51+
~{
52+
res <- .x |>
53+
filter(str_detect(apiName, regex(core_project_regex, ignore_case = TRUE))) |>
54+
tidyr::separate(apiName, into = c("api", "value"), sep = ":", remove = FALSE) |>
55+
pull(value)
56+
if (length(res) == 0) {
57+
NA_character_
58+
} else {
59+
res |>
60+
str_remove("^cfde_") |>
61+
unique() |>
62+
paste(collapse = ",")
63+
}
64+
}
65+
)
66+
) |>
67+
## Filter to those with the requested Core Project Numbers
68+
filter(!is.na(.data$core_project_num)) |>
69+
select(-'property_meta')
70+
if(nrow(account_list) == 0) {
71+
rlang::inform(rlang::format_error_bullets(c(i = "No Google Analytics properties found for the requested Core Project Numbers")))
72+
}
73+
return(account_list)
74+
}

R/nih_reporter.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -215,8 +215,8 @@ get_core_project_info <- function(core_project_numbers) {
215215
proj_results_tbl |>
216216
full_join(all_results, by = c('core_project_num' = 'core_project_number', 'appl_id' = 'applid')) |>
217217
filter(.data$core_project_num != "") |>
218-
relocate(.data$core_project_num, .before = .data$appl_id) |>
219-
relocate(.data$found_publication, .after = .data$core_project_num)
218+
relocate('core_project_num', .before = 'appl_id') |>
219+
relocate('found_publication', .after = 'core_project_num')
220220

221221
return(all_results_combined)
222222
}

0 commit comments

Comments
 (0)