Skip to content

Commit fefe20a

Browse files
Merge pull request #55 from angelina-momin/split-register-type-column
Split register type column
2 parents d80822c + 40afe17 commit fefe20a

File tree

78 files changed

+1528
-1348
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

78 files changed

+1528
-1348
lines changed

DESCRIPTION

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Package: codecheck
22
Title: Helper Functions for CODECHECK Project
3-
Version: 0.6.0
3+
Version: 0.7.0
44
Authors@R:
55
c(person(given = "Stephen",
66
family = "Eglen",
@@ -27,6 +27,9 @@ Description: This contains helper functions for CODECHECKERS (<https://codecheck
2727
License: MIT + file LICENSE
2828
Depends: R (>= 4.0.0), gh, R.cache, parsedate
2929
Imports:
30+
dplyr,
31+
tidyr,
32+
magrittr,
3033
yaml,
3134
xtable,
3235
stringr,
@@ -41,7 +44,7 @@ Imports:
4144
whisker
4245
Encoding: UTF-8
4346
LazyData: true
44-
RoxygenNote: 7.3.1
47+
RoxygenNote: 7.3.2
4548
VignetteBuilder: knitr
4649
Suggests:
4750
tinytest,

R/codecheck.R

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -286,7 +286,7 @@ create_zenodo_record <- function(zen) {
286286
this_doi = myrec$metadata$prereserve_doi$doi
287287
cat("The following URL is your Zenodo DOI.\n")
288288
cat("Please add this to codecheck.yml in report: field\n")
289-
print(paste0("https://doi.org/", this_doi))
289+
print(paste0(CONFIG$HYPERLINKS[["doi"]], this_doi))
290290
cat("Remember to reload the yaml file after editing it.\n")
291291
get_zenodo_record(this_doi)
292292
}
@@ -351,7 +351,8 @@ set_zenodo_metadata <- function(zen, record, metadata) {
351351
draft$addRelatedIdentifier(relation = "isSupplementTo", identifier = metadata$paper$reference)
352352

353353
draft <- zen$depositRecord(draft)
354-
cat(paste0("Check your record online at https://zenodo.org/deposit/",
354+
cat(paste0("Check your record online at",
355+
CONFIG$HYPERLINKS[["zenodo_deposit"]],
355356
record,
356357
"\n"))
357358
}

R/configuration.R

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,11 @@ get_codecheck_yml_osf <- function(x) {
7878
#' @importFrom httr GET content
7979
#' @importFrom yaml yaml.load
8080
get_codecheck_yml_gitlab <- function(x) {
81-
response <- httr::GET(paste0("https://gitlab.com/", x, "/-/raw/main/codecheck.yml?inline=false"))
81+
# Loading config.R file which is needed for the hyperlink
82+
source(system.file("extdata", "config.R", package = "codecheck"))
83+
84+
link <- paste0(CONFIG$HYPERLINKS[["gitlab"]], x, "/-/raw/main/codecheck.yml?inline=false")
85+
response <- httr::GET(link)
8286

8387
if (response$status == 200) {
8488
content <- httr::content(response, as = "text", encoding = "UTF-8")

R/register.R

Lines changed: 6 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -21,45 +21,20 @@
2121
register_render <- function(register = read.csv("register.csv", as.is = TRUE),
2222
filter_by = c("venues", "codecheckers"),
2323
outputs = c("html", "md", "json")) {
24+
# Loading the dplyr package otherwise we cant use "%>%"
25+
library(dplyr)
26+
2427
# Loading config.R file
2528
source(system.file("extdata", "config.R", package = "codecheck"))
2629

2730
register_table <- preprocess_register(register, filter_by)
28-
2931
# Setting number of codechecks now for later use. This is done to avoid double counting codechecks
3032
# done by multiple authors.
3133
CONFIG$NO_CODECHECKS <- nrow(register_table)
3234

33-
# Creating list of of register tables with indices being the filter types
34-
list_register_tables <- c()
35-
36-
# Adding the original register table. We drop the columns that are not in CONFIG$REGISTER_COLUMNS as
37-
# some of them may have added in the preprocessing for the sake of filtering
38-
og_register_table <- register_table[, names(register_table) %in% CONFIG$REGISTER_COLUMNS]
39-
list_register_tables[["none"]] <- list("original"= og_register_table)
40-
41-
if (length(filter_by)!=0){
42-
create_filtered_register_csvs(filter_by, register)
43-
# Creating and adding filtered registered tables to list of tables
44-
list_register_tables <- add_filtered_register_tables(list_register_tables, register_table, filter_by)
45-
}
46-
47-
# Rendering files
48-
if ("md" %in% outputs) render_register_mds(list_register_tables)
49-
if ("html" %in% outputs) {
50-
render_register_htmls(list_register_tables)
51-
52-
for (filter in filter_by){
53-
render_non_register_htmls(list_register_tables[[filter]], page_type = filter)
54-
}
55-
}
56-
if ("json" %in% outputs) {
57-
render_register_jsons(list_register_tables)
58-
59-
for (filter in filter_by){
60-
render_non_register_jsons(list_register_tables[[filter]], page_type = filter)
61-
}
62-
}
35+
create_filtered_reg_csvs(register, filter_by)
36+
create_register_files(register_table, filter_by, outputs)
37+
create_non_register_files(register_table, filter_by)
6338

6439
return(register_table)
6540
}
Lines changed: 40 additions & 125 deletions
Original file line numberDiff line numberDiff line change
@@ -1,136 +1,51 @@
1-
#' Creates filtered register csv files
2-
#'
3-
#' Each csv file is saved in the appropriate output_dir.
4-
#'
5-
#' @param filter_by A vector of strings specifying the names of the columns to filter by.
6-
#' @param register A dataframe representing the register data to be filtered.
7-
create_filtered_register_csvs <- function(filter_by, register){
8-
1+
#' Creates filtered CSV files from a register based on specified filters.
2+
#'
3+
#' The function processes the register by applying filters specified in `filter_by`.
4+
#' For "codecheckers", a temporary CSV is loaded and processed as the original register.csv
5+
#' does not have the codechecker column.
6+
#' The register is then grouped by the filter column, and for each group, a CSV file is generated.
7+
#'
8+
#' @param register The register to be filtered.
9+
#' @param filter_by List of filters to apply (e.g., "venues", "codecheckers").
10+
#'
11+
create_filtered_reg_csvs <- function(register, filter_by){
912
for (filter in filter_by){
10-
column_name <- determine_filter_column_name(filter)
11-
12-
# If filtered by codecheckers we replace the register with the register with codechecker
13-
# columns
1413
if (filter == "codecheckers"){
14+
# Using the temporary codechecker register
1515
register <- read.csv(CONFIG$DIR_TEMP_REGISTER_CODECHECKER, as.is = TRUE)
1616
# Once the temp_register is loaded, we can remove it
1717
file.remove(CONFIG$DIR_TEMP_REGISTER_CODECHECKER)
18-
}
19-
20-
unique_values <- get_unique_values_from_filter(register, column_name)
2118

22-
# Filtering the register
23-
for (value in unique_values) {
24-
# For filtering by codechecker we need to check if unique value is contained
25-
# in the list which is the row value.
26-
if (column_name == "Codechecker"){
27-
mask <- sapply(register$Codechecker, function(x) value %in% fromJSON(x))
28-
filtered_register <- register[mask, ]
29-
30-
#! Edit depending on whether they want to keep the column
31-
# Only keeping the column values specified in CONFIG$REGISTER_COLUMNS
32-
filtered_register <- filtered_register[, names(filtered_register) %in% CONFIG$REGISTER_COLUMNS]
33-
}
19+
# Splitting the comma-separated strings into lists
20+
register$Codechecker <- strsplit(register$Codechecker, ",")
21+
22+
# Unnesting the files
23+
register <- register %>% tidyr::unnest(Codechecker)
24+
register$Codechecker <- unlist(register$Codechecker)
25+
}
3426

35-
# Else we check against the row value itself
36-
else{
37-
filtered_register <- register[register[[column_name]]==value, ]
38-
}
27+
filter_col_name <- CONFIG$FILTER_COLUMN_NAMES[[filter]]
3928

40-
output_dir <- paste0(get_output_dir(filter, value), "register.csv")
41-
42-
if (!dir.exists(dirname(output_dir))) {
43-
dir.create(dirname(output_dir), recursive = TRUE, showWarnings = TRUE)
44-
}
45-
29+
# Creating groups of csvs
30+
# Not using the nesting functionality since we want to keep the same columns
31+
grouped_registers <- register %>%
32+
group_by(across(all_of(filter_col_name)))
33+
34+
# Split into a list of data frames
35+
filtered_register_list <- grouped_registers %>% group_split()
36+
37+
# Get the group names (keys) based on the filter names
38+
register_keys <- grouped_registers %>% group_keys()
39+
40+
# Iterating through each group and generating csv
41+
for (i in seq_along(filtered_register_list)) {
42+
# Retrieving the register and its key
43+
register_key <- register_keys[[filter_col_name]][i]
44+
filtered_register <- filtered_register_list[[i]]
45+
table_details <- generate_table_details(register_key, filtered_register, filter)
46+
filtered_register <- filter_and_drop_register_columns(filtered_register, filter)
47+
output_dir <- paste0(table_details[["output_dir"]], "register.csv")
4648
write.csv(filtered_register, output_dir, row.names=FALSE)
47-
}
48-
}
49-
}
50-
51-
#' Determines the register table's column name to filter the data by.
52-
#'
53-
#' @param filter The filter name
54-
#' @return The column name to filter by
55-
determine_filter_column_name <- function(filter) {
56-
filter_column_name <- switch(filter,
57-
"venues" = "Type",
58-
"codecheckers" = "Codechecker",
59-
NULL # Default case is set to NULL
60-
)
61-
if (is.null(filter_column_name)) {
62-
stop(paste("Filter", filter, "is not recognized."))
63-
}
64-
65-
return(filter_column_name)
66-
}
67-
68-
get_unique_values_from_filter <- function(register_table, filter_column_name){
69-
# Directly retrieve from DIC_ORCID_ID_NAME
70-
if (filter_column_name == "Codechecker"){
71-
unique_values <- names(CONFIG$DICT_ORCID_ID_NAME)
72-
}
73-
74-
else{
75-
unique_values <- unique(register_table[[filter_column_name]])
7649
}
77-
return(unique_values)
78-
}
79-
80-
#' Gets the output dir depending on the filter name and the value of the filtered column
81-
#'
82-
#' @param filter The filter name
83-
#' @param column_value The value of the column the filter applies to
84-
#' @return The directory to save files to
85-
get_output_dir <- function(filter, column_value) {
86-
if (filter=="none"){
87-
return(paste0("docs/"))
88-
}
89-
90-
else if (filter=="venues"){
91-
venue_category <- determine_venue_category(column_value)
92-
# In case the venue_category itself has no further subgroups we do not need subgroups
93-
if (is.null(venue_category)){
94-
return(paste0("docs/", filter, "/", gsub(" ", "_", column_value), "/"))
95-
}
96-
97-
# Removing the venue category to obtain the venue name and replace the brackets
98-
venue_name <- determine_venue_name(column_value, venue_category)
99-
return(paste0("docs/", filter, "/", venue_category, "/", venue_name, "/")) }
100-
101-
else if (filter=="codecheckers"){
102-
# The codechecker column is always a list of codecheckers
103-
for (codechecker in column_value){
104-
return(paste0("docs/", filter, "/", gsub(" ", "_", codechecker), "/"))
105-
}
106-
}
107-
108-
else{
109-
return(paste0("docs/", filter, "/", gsub(" ", "_", tolower(column_value)), "/"))
110-
}
111-
}
112-
113-
#' Determines the venue category based on the venue_name
114-
#'
115-
#' @param venue_name The venue_name obtained from the "Type" column of the register
116-
#' @return The venue category. If the venue does not belong to any category NULL is returned
117-
determine_venue_category <- function(venue_name){
118-
list_venue_categories <- CONFIG$FILTER_SUBCATEGORIES[["venues"]]
119-
for (category in list_venue_categories){
120-
if (grepl(category, venue_name, ignore.case=TRUE)) {
121-
return(category)
122-
}
123-
}
124-
warning(paste("Register venue", venue_name, "does not fall into any of the following venue categories:", toString(list_venue_categories)))
125-
return(NULL)
126-
}
127-
128-
determine_venue_name <- function(unfiltered_venue_name, venue_category){
129-
if (is.null(venue_category)){
130-
return(NULL)
131-
}
132-
133-
venue_name <- trimws(gsub("[()]", "", gsub(venue_category, "", unfiltered_venue_name, ignore.case = TRUE)))
134-
venue_name <- gsub(" ", "_", venue_name)
135-
return(venue_name)
50+
}
13651
}

R/utils_filter_register_table.R

Lines changed: 0 additions & 52 deletions
This file was deleted.

R/utils_preprocess_register.R

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,8 @@ add_issue_number_links <- function(register_table, register) {
4747
paste0(
4848
"[",
4949
issue_id,
50-
"](https://github.com/codecheckers/register/issues/",
50+
"](",
51+
CONFIG$HYPERLINKS[["codecheck_issue"]],
5152
issue_id, ")"
5253
)
5354
} else {
@@ -95,7 +96,7 @@ add_codechecker <- function(register_table, register) {
9596
for (i in seq_len(nrow(register))) {
9697
config_yml <- get_codecheck_yml(register[i, ]$Repo)
9798

98-
codechecker_info <- list()
99+
codechecker_info <- c()
99100
if (!is.null(config_yml) && !is.null(config_yml$codechecker)) {
100101
# For each codechecker we enter the data in the form {name} (orcid id: {orcid_id})
101102
for (codechecker in config_yml$codechecker) {
@@ -113,9 +114,17 @@ add_codechecker <- function(register_table, register) {
113114
return(register_table)
114115
}
115116

117+
#' Creates a temporary CSV register with a "Codechecker" column.
118+
#'
119+
#' The function flattens the "Codechecker" column and saves the resulting table
120+
#' as a temporary CSV file. This tempeorary CSV is needed to filter the registers b
121+
#' by codecheckers.
122+
#'
123+
#' @param register_table The register table with a "Codechecker" column.
116124
create_temp_register_with_codechecker <- function(register_table){
117-
# Apply toJSON to each element in the `Codechecker` column
118-
register_table$Codechecker <- sapply(register_table$Codechecker, toJSON, auto_unbox = TRUE)
125+
# Flatten the Codechecker column (convert list elements to comma-separated strings)
126+
# This is done since jsons cannot handle list columns directly
127+
register_table$Codechecker <- sapply(register_table$Codechecker, function(x) paste(x, collapse = ","))
119128
write.csv(register_table, CONFIG$DIR_TEMP_REGISTER_CODECHECKER)
120129
}
121130

R/utils_register_check.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,8 @@ check_issue_status <- function(entry) {
3939
)
4040
if (issue$state != "closed") {
4141
warning(
42-
entry$Certificate, " issue is still open: ",
43-
"<https://github.com/codecheckers/register/issues/",
42+
entry$Certificate, " issue is still open: <",
43+
CONFIG$HYPERLINKS[["codecheck_issue"]],
4444
entry$Issue, ">"
4545
)
4646
}

0 commit comments

Comments
 (0)