|
1 |
| -#' Creates filtered register csv files |
2 |
| -#' |
3 |
| -#' Each csv file is saved in the appropriate output_dir. |
4 |
| -#' |
5 |
| -#' @param filter_by A vector of strings specifying the names of the columns to filter by. |
6 |
| -#' @param register A dataframe representing the register data to be filtered. |
7 |
| -create_filtered_register_csvs <- function(filter_by, register){ |
8 |
| - |
| 1 | +#' Creates filtered CSV files from a register based on specified filters. |
| 2 | +#' |
| 3 | +#' The function processes the register by applying filters specified in `filter_by`. |
| 4 | +#' For "codecheckers", a temporary CSV is loaded and processed as the original register.csv |
| 5 | +#' does not have the codechecker column. |
| 6 | +#' The register is then grouped by the filter column, and for each group, a CSV file is generated. |
| 7 | +#' |
| 8 | +#' @param register The register to be filtered. |
| 9 | +#' @param filter_by List of filters to apply (e.g., "venues", "codecheckers"). |
| 10 | +#' |
| 11 | +create_filtered_reg_csvs <- function(register, filter_by){ |
9 | 12 | for (filter in filter_by){
|
10 |
| - column_name <- determine_filter_column_name(filter) |
11 |
| - |
12 |
| - # If filtered by codecheckers we replace the register with the register with codechecker |
13 |
| - # columns |
14 | 13 | if (filter == "codecheckers"){
|
| 14 | + # Using the temporary codechecker register |
15 | 15 | register <- read.csv(CONFIG$DIR_TEMP_REGISTER_CODECHECKER, as.is = TRUE)
|
16 | 16 | # Once the temp_register is loaded, we can remove it
|
17 | 17 | file.remove(CONFIG$DIR_TEMP_REGISTER_CODECHECKER)
|
18 |
| - } |
19 |
| - |
20 |
| - unique_values <- get_unique_values_from_filter(register, column_name) |
21 | 18 |
|
22 |
| - # Filtering the register |
23 |
| - for (value in unique_values) { |
24 |
| - # For filtering by codechecker we need to check if unique value is contained |
25 |
| - # in the list which is the row value. |
26 |
| - if (column_name == "Codechecker"){ |
27 |
| - mask <- sapply(register$Codechecker, function(x) value %in% fromJSON(x)) |
28 |
| - filtered_register <- register[mask, ] |
29 |
| - |
30 |
| - #! Edit depending on whether they want to keep the column |
31 |
| - # Only keeping the column values specified in CONFIG$REGISTER_COLUMNS |
32 |
| - filtered_register <- filtered_register[, names(filtered_register) %in% CONFIG$REGISTER_COLUMNS] |
33 |
| - } |
| 19 | + # Splitting the comma-separated strings into lists |
| 20 | + register$Codechecker <- strsplit(register$Codechecker, ",") |
| 21 | + |
| 22 | + # Unnesting the files |
| 23 | + register <- register %>% tidyr::unnest(Codechecker) |
| 24 | + register$Codechecker <- unlist(register$Codechecker) |
| 25 | + } |
34 | 26 |
|
35 |
| - # Else we check against the row value itself |
36 |
| - else{ |
37 |
| - filtered_register <- register[register[[column_name]]==value, ] |
38 |
| - } |
| 27 | + filter_col_name <- CONFIG$FILTER_COLUMN_NAMES[[filter]] |
39 | 28 |
|
40 |
| - output_dir <- paste0(get_output_dir(filter, value), "register.csv") |
41 |
| - |
42 |
| - if (!dir.exists(dirname(output_dir))) { |
43 |
| - dir.create(dirname(output_dir), recursive = TRUE, showWarnings = TRUE) |
44 |
| - } |
45 |
| - |
| 29 | + # Creating groups of csvs |
| 30 | + # Not using the nesting functionality since we want to keep the same columns |
| 31 | + grouped_registers <- register %>% |
| 32 | + group_by(across(all_of(filter_col_name))) |
| 33 | + |
| 34 | + # Split into a list of data frames |
| 35 | + filtered_register_list <- grouped_registers %>% group_split() |
| 36 | + |
| 37 | + # Get the group names (keys) based on the filter names |
| 38 | + register_keys <- grouped_registers %>% group_keys() |
| 39 | + |
| 40 | + # Iterating through each group and generating csv |
| 41 | + for (i in seq_along(filtered_register_list)) { |
| 42 | + # Retrieving the register and its key |
| 43 | + register_key <- register_keys[[filter_col_name]][i] |
| 44 | + filtered_register <- filtered_register_list[[i]] |
| 45 | + table_details <- generate_table_details(register_key, filtered_register, filter) |
| 46 | + filtered_register <- filter_and_drop_register_columns(filtered_register, filter) |
| 47 | + output_dir <- paste0(table_details[["output_dir"]], "register.csv") |
46 | 48 | write.csv(filtered_register, output_dir, row.names=FALSE)
|
47 |
| - } |
48 |
| - } |
49 |
| -} |
50 |
| - |
51 |
| -#' Determines the register table's column name to filter the data by. |
52 |
| -#' |
53 |
| -#' @param filter The filter name |
54 |
| -#' @return The column name to filter by |
55 |
| -determine_filter_column_name <- function(filter) { |
56 |
| - filter_column_name <- switch(filter, |
57 |
| - "venues" = "Type", |
58 |
| - "codecheckers" = "Codechecker", |
59 |
| - NULL # Default case is set to NULL |
60 |
| - ) |
61 |
| - if (is.null(filter_column_name)) { |
62 |
| - stop(paste("Filter", filter, "is not recognized.")) |
63 |
| - } |
64 |
| - |
65 |
| - return(filter_column_name) |
66 |
| -} |
67 |
| - |
68 |
| -get_unique_values_from_filter <- function(register_table, filter_column_name){ |
69 |
| - # Directly retrieve from DIC_ORCID_ID_NAME |
70 |
| - if (filter_column_name == "Codechecker"){ |
71 |
| - unique_values <- names(CONFIG$DICT_ORCID_ID_NAME) |
72 |
| - } |
73 |
| - |
74 |
| - else{ |
75 |
| - unique_values <- unique(register_table[[filter_column_name]]) |
76 | 49 | }
|
77 |
| - return(unique_values) |
78 |
| -} |
79 |
| - |
80 |
| -#' Gets the output dir depending on the filter name and the value of the filtered column |
81 |
| -#' |
82 |
| -#' @param filter The filter name |
83 |
| -#' @param column_value The value of the column the filter applies to |
84 |
| -#' @return The directory to save files to |
85 |
| -get_output_dir <- function(filter, column_value) { |
86 |
| - if (filter=="none"){ |
87 |
| - return(paste0("docs/")) |
88 |
| - } |
89 |
| - |
90 |
| - else if (filter=="venues"){ |
91 |
| - venue_category <- determine_venue_category(column_value) |
92 |
| - # In case the venue_category itself has no further subgroups we do not need subgroups |
93 |
| - if (is.null(venue_category)){ |
94 |
| - return(paste0("docs/", filter, "/", gsub(" ", "_", column_value), "/")) |
95 |
| - } |
96 |
| - |
97 |
| - # Removing the venue category to obtain the venue name and replace the brackets |
98 |
| - venue_name <- determine_venue_name(column_value, venue_category) |
99 |
| - return(paste0("docs/", filter, "/", venue_category, "/", venue_name, "/")) } |
100 |
| - |
101 |
| - else if (filter=="codecheckers"){ |
102 |
| - # The codechecker column is always a list of codecheckers |
103 |
| - for (codechecker in column_value){ |
104 |
| - return(paste0("docs/", filter, "/", gsub(" ", "_", codechecker), "/")) |
105 |
| - } |
106 |
| - } |
107 |
| - |
108 |
| - else{ |
109 |
| - return(paste0("docs/", filter, "/", gsub(" ", "_", tolower(column_value)), "/")) |
110 |
| - } |
111 |
| -} |
112 |
| - |
113 |
| -#' Determines the venue category based on the venue_name |
114 |
| -#' |
115 |
| -#' @param venue_name The venue_name obtained from the "Type" column of the register |
116 |
| -#' @return The venue category. If the venue does not belong to any category NULL is returned |
117 |
| -determine_venue_category <- function(venue_name){ |
118 |
| - list_venue_categories <- CONFIG$FILTER_SUBCATEGORIES[["venues"]] |
119 |
| - for (category in list_venue_categories){ |
120 |
| - if (grepl(category, venue_name, ignore.case=TRUE)) { |
121 |
| - return(category) |
122 |
| - } |
123 |
| - } |
124 |
| - warning(paste("Register venue", venue_name, "does not fall into any of the following venue categories:", toString(list_venue_categories))) |
125 |
| - return(NULL) |
126 |
| -} |
127 |
| - |
128 |
| -determine_venue_name <- function(unfiltered_venue_name, venue_category){ |
129 |
| - if (is.null(venue_category)){ |
130 |
| - return(NULL) |
131 |
| - } |
132 |
| - |
133 |
| - venue_name <- trimws(gsub("[()]", "", gsub(venue_category, "", unfiltered_venue_name, ignore.case = TRUE))) |
134 |
| - venue_name <- gsub(" ", "_", venue_name) |
135 |
| - return(venue_name) |
| 50 | + } |
136 | 51 | }
|
0 commit comments