diff --git a/.Rprofile b/.Rprofile new file mode 100644 index 0000000..81b960f --- /dev/null +++ b/.Rprofile @@ -0,0 +1 @@ +source("renv/activate.R") diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5b6a065 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +.Rproj.user +.Rhistory +.RData +.Ruserdata diff --git a/JABS_nextflow_postprocess.Rproj b/JABS_nextflow_postprocess.Rproj new file mode 100644 index 0000000..8e3c2eb --- /dev/null +++ b/JABS_nextflow_postprocess.Rproj @@ -0,0 +1,13 @@ +Version: 1.0 + +RestoreWorkspace: Default +SaveWorkspace: Default +AlwaysSaveHistory: Default + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 +Encoding: UTF-8 + +RnwWeave: Sweave +LaTeX: pdfLaTeX diff --git a/NextFlow_Output_QC_Postprocess_1.R b/NextFlow_Output_QC_Postprocess_1.R index c109085..6da1a81 100644 --- a/NextFlow_Output_QC_Postprocess_1.R +++ b/NextFlow_Output_QC_Postprocess_1.R @@ -1,454 +1,370 @@ -#A script to clean NextFlow outputs for final processing -#Developed by Dr. Jake Beierle (don't forget the Dr., it's important) - -#----Documentation---- -#See comprehensive documentation on the github repository -#https://github.com/jacobbeierle/JABS_nextflow_postprocess/tree/main - -#Import libraries--------------------------------------------------------------- -library(tidyverse) -library(writexl) -library(janitor) -library(data.table) -options(error = NULL) #helps with error handling in functions checking for directories and filenames -#########################################################Set QC and other Values------------------------------------------------------------------ - -##Set your working directory -#working.directory <- "C:\\Users\\beierj\\Desktop\\2025-04-09_NTG_C1-C5_Analysis" - -##Set the QC values you will use to screen the Nextflow QC files -#Expected length of clip in frames (video clipping duration plus 5 seconds) -expected.length <- 60*60*30 + 5*30 -#Max number of tracklets/hour recording -max.tracklets.per.hour <- 6 -#Max frames (as percent of all frames) missing pose -max.percent.segmentation.missing <- 0.2 -#Max percentage of Frames missing pose -max.percent.pose.missing <- 0.005 -#Max percentage of Frames missing pose -max.percent.kp.missing <- 0.01 - -#Proportion of Highest and lowest fecal boli mice to plot separately for QC -fecal_boli_percent_threshold <- 0.05 - - -#Create functions--------------------------------------------------------------- -#Creates a directory if it does not already exist -directory_check_creation <- function(x){ - x <- paste0(x) - if(!dir.exists(x)){ - dir.create(x) - } - else{print(paste0("'/", substitute(x), "' already exists"))} -} - - -#Set working directory and create qc directory---------------------------------- - -#If you have defined a working directory above, set it here -if(exists("working.directory")){ - setwd(working.directory) -} - -#Check to ensure there is a nexflow ouput folder -if(!dir.exists("Nextflow_Output")){ - stop("YOU DO NOT HAVE A 'Nextflow_Output' DIRECTORY IN YOUR WORKING DIRECTORY /n - YOU NEED TO CREATE A 'Nextflow_Output' DIRECTORY WITH YOUR RESULTS") -}else{ - directory_check_creation(file.path("Nextflow_Output", "final_nextflow_feature_data")) - final.dataframes.dir <- file.path("Nextflow_Output", "final_nextflow_feature_data") -} - - -#create QC directories if not made, programatically define these directories for publishing -directory_check_creation("qc") -#directory for QC logs -directory_check_creation(file.path("qc", "nextflow_qc_logs")) -qc.log.dir <- file.path("qc", "nextflow_qc_logs") -#directory for missing or duplicated data logs -directory_check_creation(file.path("qc", "missing_or_dup_data")) -qc.missing_dup.dir <- file.path("qc", "missing_or_dup_data") -#directory for QC figures -directory_check_creation(file.path("qc", "qc_figs")) -qc.figs.dir <- file.path("qc", "qc_figs") - - - - -#Process and Publish QC logs with success or failure annotated in a CSV------------------------- - -#read QC files in NextFlow_Output directory -qc_log <- list.files( - path = "NextFlow_Output/", - pattern = "qc_batch_", - full.names = TRUE, - recursive = TRUE - ) |> - read_csv(id = "QC_file") - -#Create subset of data failing QC measures - -#Record why QC failed for each video -qc_log$passed_duration_QC <- qc_log$video_duration == expected.length -qc_log$passed_tracklet_QC <- qc_log$pose_tracklets < max.tracklets.per.hour*(expected.length/108000) -qc_log$passed_segmentation_QC <- qc_log$seg_counts > (1-max.percent.segmentation.missing) * expected.length -qc_log$passed_pose_QC <- qc_log$pose_counts > (1-max.percent.pose.missing) * expected.length -qc_log$passed_kp_QC <- qc_log$missing_keypoint_frames < max.percent.kp.missing * expected.length - -#Apply thresholds defined above to create a seperate 'failed QC' data frame -qc_log.failed <- subset(qc_log, video_duration != expected.length | - pose_tracklets > max.tracklets.per.hour*(expected.length/108000) | - seg_counts < (1-max.percent.segmentation.missing) * expected.length | - pose_counts < (1-max.percent.pose.missing) * expected.length| - missing_keypoint_frames > max.percent.kp.missing * expected.length) - -#Write final Nextflow QC files for review by a human -write.csv(qc_log, file.path(qc.log.dir , "qc_all.csv"), row.names = FALSE) -write.csv(qc_log.failed, file.path(qc.log.dir,"qc_failed.csv"), row.names = FALSE) - - -#Create a list of expected videos that would be in each Nextflow output--------------------------------------------------------- -#Create a dataframe of videos in QC log -expected_vidoes <- as.data.frame(gsub("_with_fecal_boli", ".avi", qc_log$video_name)) -expected_vidoes <- as.data.frame(gsub("_filtered", "", expected_vidoes[,1])) -colnames(expected_vidoes) <- "NetworkFilename" -#Make sure there are no duplicates -expected_vidoes <- distinct(expected_vidoes, NetworkFilename) - - -#Process Fecal Boli Data and publish QC plots-------------------------------------------------------- -fecal_boli.raw <- list.files( - path = "NextFlow_Output/", - pattern = "fecal_boli.csv", - recursive = TRUE, - full.names = TRUE) |> - read_csv() |> - select(-nextflow_version) - -colnames(fecal_boli.raw)[1] <- "NetworkFilename" -#Correct some discrepancies that may have arose in file names due to corner correction workflow -fecal_boli.raw$NetworkFilename <- gsub("_corrected", "", fecal_boli.raw$NetworkFilename) -fecal_boli.raw$NetworkFilename <- gsub("_filtered", "", fecal_boli.raw$NetworkFilename) - -#Check for missing data in fecal boli data based on QC files -fecal_boli_videos_missing_in_qc <- setdiff(expected_vidoes$NetworkFilename, fecal_boli.raw$NetworkFilename) -videos_with_missing_fecal_boli <- setdiff(fecal_boli.raw$NetworkFilename, expected_vidoes$NetworkFilename) - - -#Output warning and prepare to report report CSV if files are missing in QC log, but present in Gait analysis -if(length(fecal_boli_videos_missing_in_qc!=0)){ - fecal_boli_videos_missing_in_qc$fboli <- 1 -} - -#Output warning and report CSV if files are missing in QC log, but present in Gait analysis -if(length(videos_with_missing_fecal_boli!=0)){ - videos_with_missing_fecal_boli$missing_fboli <- 1 -} - -#Check for fecal boli rows with identical data (i.e. something went wrong in video recording) -#I remove the network file name col because data may have been misslabeled -#This is used to output QC measures in the final portion of this script -#There are many more duplicate rows here, and not nessisarily cause for alarm -duplicate_fboli_rows <- fecal_boli.raw[duplicated(fecal_boli.raw[-1]) | duplicated(fecal_boli.raw[-1], fromLast = TRUE), ] - -#Pivot longer to facilitate plotting for QC -fecal_boli.plot <- fecal_boli.raw |> - pivot_longer( - cols = !NetworkFilename, - names_to = "min", - values_to = "fecal_boli", - values_drop_na = TRUE) |> - mutate(min = parse_number(min)) - -#Plot fecal boli QC measures -outFileNamePDF <- file.path(qc.figs.dir, "fecal_boli_qc_figs.pdf") -pdf(outFileNamePDF, 6, 6) - -#Growth curve for all mice -p1 <- ggplot(fecal_boli.plot, aes(min, fecal_boli, group = NetworkFilename, colour = NetworkFilename))+ - geom_line() + - labs(title = "Fecal boli growth, all mice") + - theme(legend.position = "none") -print(p1) - -#Plot 10% mice with lowest fecal boli -p1 <- fecal_boli.plot |> - summarise(across(fecal_boli, max), .by = NetworkFilename) |> - slice_min(fecal_boli, prop = fecal_boli_percent_threshold) |> - select(NetworkFilename) |> - merge(fecal_boli.plot, by.x = "NetworkFilename") |> - ggplot(aes(min, fecal_boli, group = NetworkFilename, colour = NetworkFilename))+ - geom_line() + - labs(title = paste("Lowest ", fecal_boli_percent_threshold*100, "% of fecal boli mice", sep = "")) + - theme(legend.position = "none") -print(p1) - -#Plot 10% mice with highest fecal boli -p1 <- fecal_boli.plot |> - summarise(across(fecal_boli, max), .by = NetworkFilename) |> - slice_max(fecal_boli, prop = fecal_boli_percent_threshold) |> - select(NetworkFilename) |> - merge(fecal_boli.plot, by.x = "NetworkFilename") |> - ggplot(aes(min, fecal_boli, group = NetworkFilename, colour = NetworkFilename))+ - geom_line() + - labs(title = paste("Highest ", fecal_boli_percent_threshold*100, "% of fecal boli mice", sep = "")) + - theme(legend.position = "none") -print(p1) - -#Histogram of final fecal boli count -p1 <- fecal_boli.plot |> - arrange(desc(min)) |> - distinct(NetworkFilename, .keep_all = TRUE) |> - ggplot(aes(fecal_boli, ifelse(after_stat(count) > 0, after_stat(count), NA)))+ - geom_histogram(binwidth = 1, boundary = 0)+ - labs(title = "Fecal boli highest bin, all mice") + - ylab("count") -print(p1) - -dev.off() - -#Write out all raw, merged fecal boli counts -write.csv(fecal_boli.raw, file.path(final.dataframes.dir, "fecal_boli_raw.csv"), row.names = FALSE) - -#Process Gait Data-------------------------------------------------------------- - -#Import Gait Data -gait.raw <- list.files( - path = "NextFlow_Output/", - pattern = "gait.csv", - full.names = TRUE, - recursive = TRUE) |> - read_csv() |> - select(-nextflow_version) -colnames(gait.raw)[1] <- "NetworkFilename" - - -#Amend video path information to match other Nextflow outputs, making merging easier later -gait.raw$NetworkFilename <- sub(".", "", gait.raw$NetworkFilename) - -#Check for missing data in Gait and QC files -gait_videos_missing_in_qc <- setdiff(expected_vidoes$NetworkFilename, gait.raw$NetworkFilename) -videos_with_all_gait_missing <- setdiff(gait.raw$NetworkFilename, expected_vidoes$NetworkFilename) - -#Output warning and report CSV if files are missing in QC log, but present in Gait analysis -if(length(gait_videos_missing_in_qc!=0)){ - gait_videos_missing_in_qc$gait <- 1 -} - - -#Extract values repeated identically for each time bin (i.e. the cols identified below) and reduce to one per video -gait.duplicated_data <- gait.raw |> - select(c(NetworkFilename, `Distance Traveled`, `Body Length`, Speed, `Speed Variance`)) |> - distinct(NetworkFilename, .keep_all = TRUE) - -#Unmelt the data and add speed bin to col names -#Remove duplicated measures handled above -gait.speed_bin_data <- gait.raw |> - select(!c(`Distance Traveled`, `Body Length`, Speed, `Speed Variance`)) - -#Remove variance measures from speed bins with fewer than 3 strides -#These do not make statistical sense to report -gait.speed_bin_data[gait.speed_bin_data$`Stride Count` < 2, colnames(gait.speed_bin_data) %like% "Variance"] <- NA - -#"Unmelt" non-duplicated data by speed bin -gait.speed_bin_data <- dcast(as.data.table(gait.speed_bin_data), - formula = NetworkFilename ~ gait.speed_bin_data$'Speed Bin', - value.var = colnames(gait.speed_bin_data)[3:(ncol(gait.speed_bin_data)-1)], - sep = ".") - -#Merge binned and duplicated measur1es -gait.merged <- merge(gait.duplicated_data, gait.speed_bin_data, by = "NetworkFilename") - -#Select all unique NetworkFilenames in the qc Log, and create empty rows unrepresented in the gait features -gait.final <- merge(expected_vidoes, gait.merged, by = "NetworkFilename", all = TRUE) -gait.final[is.na(gait.final$`Stride Count.10`) , 'Stride Count.10'] <- 0 -gait.final[is.na(gait.final$`Stride Count.15`) , 'Stride Count.15'] <- 0 -gait.final[is.na(gait.final$`Stride Count.20`) , 'Stride Count.20'] <- 0 -gait.final[is.na(gait.final$`Stride Count.25`) , 'Stride Count.25'] <- 0 - -#Remove Speed bin cols, which are no longer useful -gait.final <- select(gait.final, !contains('Speed Bin')) - -#Check for gait rows with identical data (i.e. something went wrong in video recording) -#This is used to output QC measures in the final portion of this script -duplicate_gait_rows <- as_tibble(gait.final[duplicated(gait.final[-1]) | duplicated(gait.final[-1], fromLast = TRUE), ]) - -#output to final CSV -write.csv(gait.final, file.path(final.dataframes.dir,"gait_final.csv"), row.names = FALSE) - - -#Process JABS Feature Data NEEDS DEVELOPMENT------------------------------------ -JABS.features <- list.files( - path = "NextFlow_Output/", - pattern = "features.csv", - recursive = TRUE, - full.names = TRUE) |> - read_csv() |> - select(-nextflow_version) -colnames(JABS.features)[1] <- "NetworkFilename" - -#Adjust networkfile names - -JABS.features$NetworkFilename <- paste0("/", JABS.features$NetworkFilename, ".avi") -JABS.features$NetworkFilename <- gsub("_corrected", "", JABS.features$NetworkFilename) -JABS.features$NetworkFilename <- gsub("_filtered", "", JABS.features$NetworkFilename) - - -#Check for missing data in Gait and QC files -JABS_features_videos_missing_in_qc <- setdiff(expected_vidoes$NetworkFilename, JABS.features$NetworkFilename) -videos_with_JABS_features_missing <- setdiff(JABS.features$NetworkFilename, expected_vidoes$NetworkFilename) - -#Output warning and prepare to report report CSV if files are missing in QC log, but present in Gait analysis -if(length(JABS_features_videos_missing_in_qc!=0)){JABS_features_videos_missing_in_qc$JABS_features <- 1} - -#Output warning and report CSV if files are missing in QC log, but present in Gait analysis -if(length(videos_with_JABS_features_missing!=0)){videos_with_JABS_features_missing$JABS_features <- 1} - - -#Check for JABs feature rows with identical data (i.e. something went wrong in video recording) -#This is used to output QC measures in the final portion of this script -#JABS features are rounded to the nearest tenth because of minor differences in estimates -duplicate_JABS_feature_rows <- JABS.features[duplicated(sapply(JABS.features[-1], \(x) round(x, digits = 0))) | - duplicated(sapply(JABS.features[-1], \(x) round(x, digits = 0)), fromLast = TRUE), ] -#Write the final csv -write.csv(JABS.features, file.path(final.dataframes.dir, "JABS_features_final.csv"), row.names = FALSE) - - -#Process morphometrics feature data--------------------------------------------- -morpho.raw <- list.files( - path = "NextFlow_Output/", - pattern = "morphometrics.csv", - full.names = TRUE, - recursive = TRUE) |> - read_csv() |> - relocate(NetworkFilename) |> - select(!c(nextflow_version)) - -#reformat NetworkFilename -morpho.raw$NetworkFilename <- sub(".", "", morpho.raw$NetworkFilename) - -#Check for missing data in Gait and QC files -morphometrics_videos_missing_in_qc <- setdiff(expected_vidoes$NetworkFilename, morpho.raw$NetworkFilename) -videos_with_morphometrics_features_missing <- setdiff(morpho.raw$NetworkFilename, expected_vidoes$NetworkFilename) - -#Output warning and prepare to report report CSV if files are missing in QC log, but present in Gait analysis -if(length(morphometrics_videos_missing_in_qc!=0)){morphometrics_videos_missing_in_qc$morpho_features <- 1} - -#Output warning and report CSV if files are missing in QC log, but present in Gait analysis -if(length(videos_with_morphometrics_features_missing!=0)){videos_with_morphometrics_features_missing$morpho_features <- 1} - - -#Check for JABs feature rows with identical data (i.e. something went wrong in video recording) -#This is used to output QC measures in the final portion of this script -#JABS features are rounded to the nearest tenth because of minor differences in estimates -duplicate_morphometrics_rows <- morpho.raw[duplicated(morpho.raw[-1]) | duplicated(morpho.raw[-1], fromLast = TRUE), ] - - -write.csv(morpho.raw, file.path(final.dataframes.dir, "morphometrics_final.csv"), row.names = FALSE) - - - -#Merge and output all data types missing for videos----- - -#Publish all vids with missing data -#Combine into a single list -all_missing_data <- list( - "videos_with_missing_fecal_boli" = videos_with_missing_fecal_boli, - "videos_with_JABS_features_missing" = videos_with_JABS_features_missing, - "videos_with_morphometrics_features_missing" = videos_with_morphometrics_features_missing -) - -#Select the dfs with actual missing data represented, dropping the rest -publish_missing_data <- NULL -for(i in seq_along(all_missing_data)){ - if(length(all_missing_data[[i]]) > 0){ - if(length(publish_missing_data) == 0){ - publish_missing_data <- all_missing_data[[i]] - }else{publish_missing_data <- full_join(publish_missing_data, all_missing_data[[i]])} - } -} - -#Fill the csv with something if all QC passes -if(length(publish_missing_data) == 0){ - publish_missing_data <- "NO DATA MISSING" -} - -#Write the csv -write.csv(publish_missing_data, file.path(qc.missing_dup.dir, "missing_data.csv"), row.names = FALSE) - - -#Publish all vids in some feature csv, but not in the qc dataframe -#Combine into a single list -videos_not_in_qc_report <- list( - "fecal_boli_videos_missing_in_qc" = fecal_boli_videos_missing_in_qc, - "gait_videos_missing_in_qc" = gait_videos_missing_in_qc, - "JABS_features_videos_missing_in_qc" = JABS_features_videos_missing_in_qc, - "morphometrics_videos_missing_in_qc" = morphometrics_videos_missing_in_qc -) - -#Select the dfs with actual missing data represented, dropping the rest -publish_videos_not_in_qc_report <- NULL -for(i in seq_along(videos_not_in_qc_report)){ - if(length(videos_not_in_qc_report[[i]]) > 0){ - if(length(publish_videos_not_in_qc_report) == 0){ - publish_missing_data <- videos_not_in_qc_report[[i]] - }else{publish_missing_data <- full_join(publish_missing_data, videos_not_in_qc_report[[i]])} - - }else{} -} - -#Fill the csv with something if all QC passes -if(length(publish_videos_not_in_qc_report) == 0){ - publish_videos_not_in_qc_report <- "NO DATA MISSING" -} -write.csv(publish_videos_not_in_qc_report, file.path(qc.missing_dup.dir, "videos_not_in_qc_report.csv"), row.names = FALSE) - - - -#Output data that is duplicated in the data frames -duplicated_data <- list( - "dup_gait" = duplicate_gait_rows, - "dup_JABS" = duplicate_JABS_feature_rows, - "dup_morpho" = duplicate_morphometrics_rows, - "dup_fboli" = duplicate_fboli_rows -) - -#If all objecst in the list have a length of 0 (i.e. empty), report no dupli -if(all(sapply(duplicated_data, function(x) nrow(x)==0))){ - duplicated_data <- "NO DUPLICATED DATA" - write_xlsx(as.data.frame(duplicated_data), path = file.path(qc.missing_dup.dir, "duplicated_data.xlsx")) -}else{ - write_xlsx(duplicated_data, path = file.path(qc.missing_dup.dir, "duplicated_data.xlsx")) - -} - - -#Write warnings for failed QC-------------------------------------------------------------- -error.reporting <- NULL - -#Report to terminal if data is missing from QC log but in feature tables -if(!is.character(publish_videos_not_in_qc_report)){ - if(length(fecal_boli_videos_missing_in_qc)){ error.reporting <- c(error.reporting,"FECAL BOLI DATA PRESENT FOR VIDEOS NOT IN QC LOG") } - if(length(gait_videos_missing_in_qc)){ error.reporting <- c(error.reporting,"GAIT DATA PRESENT FOR VIDEOS NOT IN QC LOG") } - if(length(JABS_features_videos_missing_in_qc)){ error.reporting <- c(error.reporting,"JABS FEATURE DATA PRESENT FOR VIDEOS NOT IN QC LOG") } - if(length(morphometrics_videos_missing_in_qc)){ error.reporting <- c(error.reporting,"YOU ARE MISSING MORPHOMETRIC FEATURE DATA") } -} - -#Report to terminal if data is missing feature tables but in QC log -if(!is.character(publish_missing_data)){ - if(length(videos_with_missing_fecal_boli)){error.reporting <- c(error.reporting,"YOU ARE MISSING FECAL BOLI DATA") } - if(length(videos_with_JABS_features_missing)){ error.reporting <- c(error.reporting,"YOU ARE MISSING JABS FEATURE DATA") } - if(length(videos_with_morphometrics_features_missing)){ error.reporting <- c(error.reporting,"MORPHOMETRIC DATA PRESENT FOR VIDEOS NOT IN QC LOG") } -} - -#Report to terminal if there is duplicated data -if(!is.character(duplicated_data)){ error.reporting <- c(error.reporting,"YOU HAVE DUPLICATED DATA!") } - - -#Print out all errors after code done running -if(length(error.reporting) == 0){ - print("FINAL ERROR REPORT: NO ERRORS TO REPORT") -}else{ - print("FINAL ERROR REPORT:", ) - paste(error.reporting, collapse = "\n") -} +#!/usr/bin/env Rscript +# A script to clean NextFlow outputs for final processing +# Developed by Dr. Jake Beierle (don't forget the Dr., it's important) + +# ----Documentation---- +# See comprehensive documentation on the github repository +# https://github.com/jacobbeierle/JABS_nextflow_postprocess/tree/main + +# ====================== +# Set up / Libraries / Options +# ====================== + +library(tidyverse) +library(writexl) + +# ======================= +# Argument Configuration +# ====================== + +# Set your paths here: +input.dir <- "~/kumar-group/SING-grant/NextflowOutput/" +output.dir <- "~/kumar-group/SING-grant/Nextflow_postprocess" + +# Set your parameters here: +params <- list( + expected_length = 60*60*30 + 5*30, + max_tracklet_per_hour = 6, + max_missing_pose = 0.005, + max_missing_segmentation = 0.2, + max_missing_keypoint = 0.01, + fecal_boli_quantile_plotting = 0.05 +) + +# Optional: Override with YAML file (uncomment if you want to use this) +# yaml_vals <- yaml::read_yaml("path/to/your/config.yaml") +# params <- modifyList(params, yaml_vals) + +# Print final configuration +cat("=== QC CONFIGURATION ===\n") +cat("Input directory: ", input.dir, "\n") +cat("Output directory:", output.dir, "\n") +cat("QC Parameters:\n") +print(params) + +# ================== +# Create output directories +# ================== + +for (subdirectory in c("final_nextflow_feature_data", + "qc/nextflow_qc_logs", + "qc/missing_or_dup_data", + "qc/qc_figs")) { + dir.path = file.path(output.dir, subdirectory) + dir.create(dir.path, recursive = T, showWarnings = F) +} + +# ================ +# Process and Publish QC logs with success or failure annotated in a CSV +# ================ +# Read QC files in NextFlow_Output directory +qc_log <- list.files( + path = "~/kumar-group/SING-grant/NextflowOutput/", + pattern = "qc_batch_", + full.names = TRUE, + recursive = TRUE + ) %>% + read_csv(id = "QC_file", show_col_types = FALSE) + +# Record why QC failed for each video +qc_log <- qc_log %>% + mutate(passed_duration_QC = video_duration == params$expected_length, + passed_tracklet_QC = pose_tracklets < params$max_tracklet_per_hour * params$expected_length / 108000, + passed_segmentation_QC = seg_counts > (1 - params$max_missing_segmentation) * params$expected_length, + passed_pose_QC = pose_counts > (1 - params$max_missing_pose) * params$expected_length, + passed_kp_QC = missing_keypoint_frames < params$max_missing_keypoint * params$expected_length) + +# Apply thresholds defined above to create a separate 'failed QC' data frame +qc_log.failed <- qc_log %>% + filter(if_any(starts_with("passed_"), ~ !.x)) + +# Write final Nextflow QC files for review by a human +write.csv(qc_log, file.path(output.dir, "qc/nextflow_qc_logs/qc_all.csv"), row.names = FALSE) +write.csv(qc_log.failed, file.path(output.dir, "qc/nextflow_qc_logs/qc_failed.csv"), row.names = FALSE) + +# List of expected videos from the QC log files +expected_videos <- qc_log$video_name |> + gsub("_with_fecal_boli", "", x = _) |> + gsub("_filtered", "", x = _) |> + sub("^/", "", x = _) |> + unique() # Remove duplicate + +# ================== +# Helper functions for processing output data +# ================== +read_raw_data <- function(input_dir, pattern) { + ########################################################################## + # Read in multiple CSV files from a directory and harmonize the ID column + # + # Args: + # input_dir: path to the folder containing CSV files + # pattern: regex pattern to match file names + # + # Returns: + # A tibble with: + # - NetworkFilename as the first column + # - Cleaned NetworkFilename (no "_corrected", "_filtered", ".avi", leading "." or "/") + # + # Notes: + # - If NetworkFilename does not exist, the first column is used as ID + # - Useful for harmonizing output from different workflows before merging + ########################################################################## + + # Read in data from multiple csv files of the same patterns + raw_data <- list.files(path = input_dir, pattern = pattern, + recursive = T, full.names = T) |> + read_csv(show_col_types = FALSE) + + # Use NetworkFilename as the ID column (move it to the first column if not already so) + if ("NetworkFilename" %in% names(raw_data)) { + raw_data <- relocate(raw_data, NetworkFilename, .before = 1) + } else { + colnames(raw_data)[1] <- "NetworkFilename" + } + + # Clean and harmonize the NetworkFilename across data + raw_data$NetworkFilename <- raw_data$NetworkFilename |> + gsub("_corrected", "", x = _) |> + gsub("_filtered", "", x = _) |> + gsub("\\.avi$", "", x = _) |> + sub("^\\.", "", x = _) |> + sub("^/", "", x = _) + + return(raw_data) +} + +check_missing_and_dup <- function(expected_videos, data_df, corr_thres = 0.99) { + ########################################################################## + # Check for missing videos and duplicated rows in a dataset + # + # Args: + # expected_videos: character vector of expected NetworkFilename values + # data_df: dataframe containing a NetworkFilename column and data + # corr_thres: a number for how much correlated 2 rows are to be flagged + # + # Returns: + # A list with three elements: + # - missing_qc: videos present in expected_videos but missing in output_file + # - missing_output: videos present in output_file but missing in expected_videos + # - dup_data: rows in output_file that are duplicated (ignoring NetworkFilename), + # after rounding numeric columns to zero digits + # + # Notes: + # - Useful for QC of experimental datasets, e.g., fecal boli or gait data + # - Rounds numeric columns before checking for duplicates to account for minor differences + ########################################################################## + + video_missing_output <- setdiff(expected_videos, data_df$NetworkFilename) + video_missing_in_qc <- setdiff(data_df$NetworkFilename, expected_videos) + + # Check for rows with identical data (i.e. something went wrong in video recording) + dup_idx <- which(duplicated(data_df[, -1]) | duplicated(data_df[, -1], fromLast = TRUE)) + + # Check for rows with high correlation + corr_mat <- data_df %>% + dplyr::select(where(is.numeric)) %>% + scale() %>% t() %>% + cor(use = "pairwise.complete.obs") + + # correlated row pairs above threshold + row_pairs <- which(corr_mat > corr_thres & row(corr_mat) != col(corr_mat), arr.ind = TRUE) + cor_idx <- unique(c(row_pairs[,1], row_pairs[,2])) + + # UNION: rows that are duplicated OR highly correlated + all_idx <- sort(unique(c(dup_idx, cor_idx))) + + duplicated_rows <- data_df[all_idx,] + + return(list(missing_qc = video_missing_in_qc, + missing_output = video_missing_output, + dup_data = duplicated_rows)) +} + +# ================== +# Process fecal boli data +# ================== +# Concatenate all instances +fecal_boli.raw <- read_raw_data(input_dir = "~/kumar-group/SING-grant/NextflowOutput/", + pattern = "fecal_boli.csv") + +# Check for missing and duplicated data +fecal_boli.summary <- check_missing_and_dup(expected_videos = expected_videos, + data_df = fecal_boli.raw) +# Write out all raw, merged fecal boli counts +write.csv(fecal_boli.raw, file.path(output.dir, "final_nextflow_feature_data/fecal_boli_raw.csv"), row.names = FALSE) + +# ================ +# Fecal boli QC plots +# ================ +# Pivot longer to facilitate plotting for QC +fecal_boli.plot <- fecal_boli.raw |> + pivot_longer( + cols = !c(NetworkFilename, nextflow_version), + names_to = "min", + values_to = "fecal_boli", + values_drop_na = TRUE) |> + mutate(min = parse_number(min)) + +# Plot fecal boli QC measures +outFileNamePDF <- file.path(output.dir, "qc/qc_figs/fecal_boli_qc_figs.pdf") +pdf(outFileNamePDF, 6, 6) + +# Growth curve for all mice +ggplot(fecal_boli.plot, aes(min, fecal_boli, group = NetworkFilename, colour = NetworkFilename))+ + geom_line() + + labs(title = "Fecal boli growth, all mice") + + theme(legend.position = "none") + +# Plot mice with lowest fecal boli +fecal_boli.plot |> + summarise(across(fecal_boli, max), .by = NetworkFilename) |> + slice_min(fecal_boli, prop = params$fecal_boli_quantile_plotting) |> + select(NetworkFilename) |> + merge(fecal_boli.plot, by.x = "NetworkFilename") |> + ggplot(aes(min, fecal_boli, group = NetworkFilename, colour = NetworkFilename))+ + geom_line() + + labs(title = paste("Lowest ", params$fecal_boli_quantile_plotting*100, "% of fecal boli mice", sep = "")) + + theme(legend.position = "none") + +# Plot mice with highest fecal boli +fecal_boli.plot |> + summarise(across(fecal_boli, max), .by = NetworkFilename) |> + slice_max(fecal_boli, prop = params$fecal_boli_quantile_plotting) |> + select(NetworkFilename) |> + merge(fecal_boli.plot, by.x = "NetworkFilename") |> + ggplot(aes(min, fecal_boli, group = NetworkFilename, colour = NetworkFilename))+ + geom_line() + + labs(title = paste("Highest ", params$fecal_boli_quantile_plotting*100, "% of fecal boli mice", sep = "")) + + theme(legend.position = "none") + +# Histogram of final fecal boli count +fecal_boli.plot |> + arrange(desc(min)) |> + distinct(NetworkFilename, .keep_all = TRUE) |> + ggplot(aes(fecal_boli))+ + geom_histogram(binwidth = 1, boundary = 0)+ + labs(title = "Fecal boli highest bin, all mice") + + ylab("count") + +invisible(dev.off()) + +# =============== +# Process Gait Data +# =============== +# Import Gait Data +gait.raw <- read_raw_data(input_dir = "~/kumar-group/SING-grant/NextflowOutput/", + pattern = "gait.csv") + +video_level_metrics = c("Distance Traveled", "Body Length", "Speed", "Speed Variance", "nextflow_version") + +gait.wide_format <- gait.raw %>% + # Remove variance measures from speed bins with fewer than 3 strides + mutate(across(.cols = contains("Variance") & !all_of(video_level_metrics), + .fns = ~ ifelse(`Stride Count` < 3, NA, .x))) %>% + # Convert to wide format + pivot_wider(id_cols = c(NetworkFilename, all_of(video_level_metrics)), + names_from = `Speed Bin`, + values_from = -c(NetworkFilename, all_of(video_level_metrics), `Speed Bin`), + names_sep = ".") %>% + mutate(across(.cols = c(`Stride Count.10`, `Stride Count.15`, `Stride Count.20`, `Stride Count.25`), + .fns = ~ replace_na(.x, 0))) + +# Check for missing and duplicated data with gait +gait.summary <- check_missing_and_dup(expected_videos = expected_videos, + data_df = gait.wide_format) + +#output to final CSV +write.csv(gait.wide_format, file.path(output.dir, "final_nextflow_feature_data/gait_final.csv"), row.names = FALSE) + +# ================= +# Process JABS Feature Data +# ================= +JABS.features <- read_raw_data(input_dir = "~/kumar-group/SING-grant/NextflowOutput/", + pattern = "features.csv") + +# Check for missing data in JABS.features +JABS.features.summary <- check_missing_and_dup(expected_videos = expected_videos, + data_df = JABS.features) + +# Write the final csv +write.csv(JABS.features, file.path(output.dir, "final_nextflow_feature_data/JABS_features_final.csv"), row.names = FALSE) + +# ==================== +# Process morphometrics feature data +# ==================== +morpho.raw <- read_raw_data(input_dir = "~/kumar-group/SING-grant/NextflowOutput/", + pattern = "morphometrics.csv") + +# Check for missing and duplicated data in morphometric outputs +morpho.summary <- check_missing_and_dup(expected_videos = expected_videos, + data_df = morpho.raw) + +# ================= +# Report and output data for all warnings +# ================= +# Videos in QC but not in output +all_missing_data <- list("fecal_boli" = fecal_boli.summary$missing_output, + "gait" = gait.summary$missing_output, + "JABS_features" = JABS.features.summary$missing_output, + "Morphometrics" = morpho.summary$missing_output) + +# Videos in output but not in QC +videos_not_in_qc_report <- list("fecal_boli" = fecal_boli.summary$missing_qc, + "gait" = gait.summary$missing_qc, + "JABS_features" = JABS.features.summary$missing_qc, + "morphometrics" = morpho.summary$missing_qc) + +# Output data that is duplicated in the data frames +all_duplicated_data <- list("fecal_boli" = fecal_boli.summary$dup_data, + "gait" = gait.summary$dup_data, + "JABS_features" = JABS.features.summary$dup_data, + "morphometrics" = morpho.summary$dup_data) + +no_missing_output <- all(sapply(all_missing_data, length) == 0) +no_missing_qc <- all(sapply(videos_not_in_qc_report, length) == 0) +no_dups <- all(sapply(all_duplicated_data, nrow) == 0) + +# Summarize and write warnings +cat("=== FINAL ERROR REPORT ===\n") +if (no_missing_output && no_missing_qc && no_dups) { + cat("No errors to report\n") + # Create placeholder files + write.csv("No data missing", file.path(output.dir, "qc/missing_or_dup_data/missing_data.csv"), row.names = FALSE) + write.csv("No data missing", file.path(output.dir, "qc/missing_or_dup_data/videos_not_in_qc_report.csv"), row.names = FALSE) + write_xlsx(as.data.frame("No duplicated data"), file.path(output.dir, "qc/missing_or_dup_data/duplicated_data.xlsx"), row.names = FALSE) +} else { + + # Check for missing output data + if (no_missing_output) { + write.csv("No data missing", file.path(output.dir, "qc/missing_or_dup_data/missing_data.csv"), row.names = FALSE) + } else { + all_missing_data <- all_missing_data %>% + enframe(., name = "outputType", value = "video_path") %>% + unnest(cols = video_path) %>% + mutate(missing = TRUE) %>% + pivot_wider(id_cols = video_path, names_from = outputType, values_from = missing) + write.csv(all_missing_data, file.path(output.dir, "qc/missing_or_dup_data/missing_data.csv"), row.names = FALSE) + cat(paste("Missing", colnames(all_missing_data)[-1], "data"), sep = "\n") + } + + # Check for missing QC data + if (no_missing_qc) { + write.csv("No data missing", file.path(output.dir, "qc/missing_or_dup_data/videos_not_in_qc_report.csv"), row.names = FALSE) + } else { + videos_not_in_qc_report <- videos_not_in_qc_report %>% + enframe(., name = "outputType", value = "video_path") %>% + unnest(cols = video_path) %>% + mutate(missing = TRUE) %>% + pivot_wider(id_cols = video_path, names_from = outputType, values_from = missing) + write.csv(videos_not_in_qc_report, file.path(output.dir, "qc/missing_or_dup_data/videos_not_in_qc_report.csv"), row.names = FALSE) + cat(paste("Missing video in QC for", colnames(videos_not_in_qc_report)[-1], "data"), sep = "\n") + } + + # Check for duplicated data + if (no_dups) { + write_xlsx(as.data.frame("No duplicated data"), file.path(output.dir, "qc/missing_or_dup_data/duplicated_data.xlsx"), row.names = FALSE) + } else { + write_xlsx(all_duplicated_data, path = file.path(output.dir, "qc/missing_or_dup_data/duplicated_data.xlsx")) + cat(paste("Duplicated data for", names(all_duplicated_data)[sapply(all_duplicated_data, nrow) != 0]), sep = "\n") + } +} diff --git a/RBase.def b/RBase.def new file mode 100644 index 0000000..43a51cb --- /dev/null +++ b/RBase.def @@ -0,0 +1,27 @@ +Bootstrap: docker +From: rocker/r-ver:4.3.1 + +%files + renv.lock /opt/renv.lock + +%post + apt-get update + apt-get -y install \ + libcurl4-gnutls-dev \ + libssl-dev \ + libxml2-dev \ + libgit2-dev + + R -e "install.packages('renv', repos='https://cloud.r-project.org')" + + mkdir -p /opt/project + cp /opt/renv.lock /opt/project/ + + R -e "setwd('/opt/project'); renv::restore()" + +%environment + export LANG=en_US.UTF-8 + export LC_ALL=en_US.UTF-8 + +%runscript + exec R "$@" diff --git a/renv.lock b/renv.lock new file mode 100644 index 0000000..4e3b4b1 --- /dev/null +++ b/renv.lock @@ -0,0 +1,1956 @@ +{ + "R": { + "Version": "4.3.1", + "Repositories": [ + { + "Name": "CRAN", + "URL": "https://p3m.dev/cran/2023-10-30" + } + ] + }, + "Bioconductor": { + "Version": "3.18" + }, + "Packages": { + "BiocGenerics": { + "Package": "BiocGenerics", + "Version": "0.48.1", + "Source": "Bioconductor", + "Requirements": [ + "R", + "graphics", + "methods", + "stats", + "utils" + ], + "Hash": "e34278c65d7dffcc08f737bf0944ca9a" + }, + "BiocManager": { + "Package": "BiocManager", + "Version": "1.30.22", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "utils" + ], + "Hash": "d57e43105a1aa9cb54fdb4629725acb1" + }, + "BiocVersion": { + "Package": "BiocVersion", + "Version": "3.18.1", + "Source": "Bioconductor", + "Requirements": [ + "R" + ], + "Hash": "2ecaed86684f5fae76ed5530f9d29c4a" + }, + "ComplexHeatmap": { + "Package": "ComplexHeatmap", + "Version": "2.18.0", + "Source": "Bioconductor", + "Requirements": [ + "GetoptLong", + "GlobalOptions", + "IRanges", + "R", + "RColorBrewer", + "circlize", + "clue", + "codetools", + "colorspace", + "digest", + "doParallel", + "foreach", + "grDevices", + "graphics", + "grid", + "matrixStats", + "methods", + "png", + "stats" + ], + "Hash": "fd8d03c43e175afce12c1012711a05cc" + }, + "DBI": { + "Package": "DBI", + "Version": "1.1.3", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "methods" + ], + "Hash": "b2866e62bab9378c3cc9476a1954226b" + }, + "GetoptLong": { + "Package": "GetoptLong", + "Version": "1.0.5", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "GlobalOptions", + "R", + "crayon", + "methods", + "rjson" + ], + "Hash": "61fac01c73abf03ac72e88dc3952c1e3" + }, + "GlobalOptions": { + "Package": "GlobalOptions", + "Version": "0.1.2", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "methods", + "utils" + ], + "Hash": "c3f7b221e60c28f5f3533d74c6fef024" + }, + "IRanges": { + "Package": "IRanges", + "Version": "2.36.0", + "Source": "Bioconductor", + "Requirements": [ + "BiocGenerics", + "R", + "S4Vectors", + "methods", + "stats", + "stats4", + "utils" + ], + "Hash": "f98500eeb93e8a66ad65be955a848595" + }, + "MASS": { + "Package": "MASS", + "Version": "7.3-60", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "grDevices", + "graphics", + "methods", + "stats", + "utils" + ], + "Hash": "a56a6365b3fa73293ea8d084be0d9bb0" + }, + "Matrix": { + "Package": "Matrix", + "Version": "1.6-1.1", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "grDevices", + "graphics", + "grid", + "lattice", + "methods", + "stats", + "utils" + ], + "Hash": "1a00d4828f33a9d690806e98bd17150c" + }, + "R6": { + "Package": "R6", + "Version": "2.5.1", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R" + ], + "Hash": "470851b6d5d0ac559e9d01bb352b4021" + }, + "RColorBrewer": { + "Package": "RColorBrewer", + "Version": "1.1-3", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R" + ], + "Hash": "45f0398006e83a5b10b72a90663d8d8c" + }, + "Rcpp": { + "Package": "Rcpp", + "Version": "1.0.11", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "methods", + "utils" + ], + "Hash": "ae6cbbe1492f4de79c45fce06f967ce8" + }, + "S4Vectors": { + "Package": "S4Vectors", + "Version": "0.40.2", + "Source": "Bioconductor", + "Repository": "Bioconductor 3.18", + "Requirements": [ + "BiocGenerics", + "R", + "methods", + "stats", + "stats4", + "utils" + ], + "Hash": "1716e201f81ced0f456dd5ec85fe20f8" + }, + "askpass": { + "Package": "askpass", + "Version": "1.2.0", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "sys" + ], + "Hash": "cad6cf7f1d5f6e906700b9d3e718c796" + }, + "backports": { + "Package": "backports", + "Version": "1.4.1", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R" + ], + "Hash": "c39fbec8a30d23e721980b8afb31984c" + }, + "base64enc": { + "Package": "base64enc", + "Version": "0.1-3", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R" + ], + "Hash": "543776ae6848fde2f48ff3816d0628bc" + }, + "bayestestR": { + "Package": "bayestestR", + "Version": "0.13.1", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "datawizard", + "graphics", + "insight", + "methods", + "stats", + "utils" + ], + "Hash": "61f643ea5ee9fe0e70ab0246340b3c2e" + }, + "bit": { + "Package": "bit", + "Version": "4.0.5", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R" + ], + "Hash": "d242abec29412ce988848d0294b208fd" + }, + "bit64": { + "Package": "bit64", + "Version": "4.0.5", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "bit", + "methods", + "stats", + "utils" + ], + "Hash": "9fe98599ca456d6552421db0d6772d8f" + }, + "blob": { + "Package": "blob", + "Version": "1.2.4", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "methods", + "rlang", + "vctrs" + ], + "Hash": "40415719b5a479b87949f3aa0aee737c" + }, + "broom": { + "Package": "broom", + "Version": "1.0.5", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "backports", + "dplyr", + "ellipsis", + "generics", + "glue", + "lifecycle", + "purrr", + "rlang", + "stringr", + "tibble", + "tidyr" + ], + "Hash": "fd25391c3c4f6ecf0fa95f1e6d15378c" + }, + "bslib": { + "Package": "bslib", + "Version": "0.5.1", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "base64enc", + "cachem", + "grDevices", + "htmltools", + "jquerylib", + "jsonlite", + "memoise", + "mime", + "rlang", + "sass" + ], + "Hash": "283015ddfbb9d7bf15ea9f0b5698f0d9" + }, + "cachem": { + "Package": "cachem", + "Version": "1.0.8", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "fastmap", + "rlang" + ], + "Hash": "c35768291560ce302c0a6589f92e837d" + }, + "callr": { + "Package": "callr", + "Version": "3.7.3", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "R6", + "processx", + "utils" + ], + "Hash": "9b2191ede20fa29828139b9900922e51" + }, + "cellranger": { + "Package": "cellranger", + "Version": "1.1.0", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "rematch", + "tibble" + ], + "Hash": "f61dbaec772ccd2e17705c1e872e9e7c" + }, + "circlize": { + "Package": "circlize", + "Version": "0.4.15", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "GlobalOptions", + "R", + "colorspace", + "grDevices", + "graphics", + "grid", + "methods", + "shape", + "stats", + "utils" + ], + "Hash": "2bb47a2fe6ab009b1dcc5566d8c3a988" + }, + "cli": { + "Package": "cli", + "Version": "3.6.1", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "utils" + ], + "Hash": "89e6d8219950eac806ae0c489052048a" + }, + "clipr": { + "Package": "clipr", + "Version": "0.8.0", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "utils" + ], + "Hash": "3f038e5ac7f41d4ac41ce658c85e3042" + }, + "clue": { + "Package": "clue", + "Version": "0.3-65", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "cluster", + "graphics", + "methods", + "stats" + ], + "Hash": "d6b53853800595408a776900bcc0c23f" + }, + "cluster": { + "Package": "cluster", + "Version": "2.1.4", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "grDevices", + "graphics", + "stats", + "utils" + ], + "Hash": "5edbbabab6ce0bf7900a74fd4358628e" + }, + "codetools": { + "Package": "codetools", + "Version": "0.2-19", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R" + ], + "Hash": "c089a619a7fae175d149d89164f8c7d8" + }, + "colorRamp2": { + "Package": "colorRamp2", + "Version": "0.1.0", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "colorspace", + "grDevices", + "methods", + "stats" + ], + "Hash": "9d3ab31de2c98399da370982a23733b6" + }, + "colorspace": { + "Package": "colorspace", + "Version": "2.1-0", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "grDevices", + "graphics", + "methods", + "stats" + ], + "Hash": "f20c47fd52fae58b4e377c37bb8c335b" + }, + "conflicted": { + "Package": "conflicted", + "Version": "1.2.0", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "cli", + "memoise", + "rlang" + ], + "Hash": "bb097fccb22d156624fd07cd2894ddb6" + }, + "correlation": { + "Package": "correlation", + "Version": "0.8.4", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "bayestestR", + "datasets", + "datawizard", + "insight", + "parameters", + "stats" + ], + "Hash": "d8bd29a9abda6eed9aaab3ba5769f231" + }, + "cpp11": { + "Package": "cpp11", + "Version": "0.4.6", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R" + ], + "Hash": "707fae4bbf73697ec8d85f9d7076c061" + }, + "crayon": { + "Package": "crayon", + "Version": "1.5.2", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "grDevices", + "methods", + "utils" + ], + "Hash": "e8a1e41acf02548751f45c718d55aa6a" + }, + "curl": { + "Package": "curl", + "Version": "5.1.0", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R" + ], + "Hash": "9123f3ef96a2c1a93927d828b2fe7d4c" + }, + "data.table": { + "Package": "data.table", + "Version": "1.14.8", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "methods" + ], + "Hash": "b4c06e554f33344e044ccd7fdca750a9" + }, + "datawizard": { + "Package": "datawizard", + "Version": "0.9.0", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "insight", + "stats", + "utils" + ], + "Hash": "1706690277f29a2ee69d59483e21c5c6" + }, + "dbplyr": { + "Package": "dbplyr", + "Version": "2.4.0", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "DBI", + "R", + "R6", + "blob", + "cli", + "dplyr", + "glue", + "lifecycle", + "magrittr", + "methods", + "pillar", + "purrr", + "rlang", + "tibble", + "tidyr", + "tidyselect", + "utils", + "vctrs", + "withr" + ], + "Hash": "59351f28a81f0742720b85363c4fdd61" + }, + "digest": { + "Package": "digest", + "Version": "0.6.33", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "utils" + ], + "Hash": "b18a9cf3c003977b0cc49d5e76ebe48d" + }, + "doParallel": { + "Package": "doParallel", + "Version": "1.0.17", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "foreach", + "iterators", + "parallel", + "utils" + ], + "Hash": "451e5edf411987991ab6a5410c45011f" + }, + "dplyr": { + "Package": "dplyr", + "Version": "1.1.3", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "R6", + "cli", + "generics", + "glue", + "lifecycle", + "magrittr", + "methods", + "pillar", + "rlang", + "tibble", + "tidyselect", + "utils", + "vctrs" + ], + "Hash": "e85ffbebaad5f70e1a2e2ef4302b4949" + }, + "dtplyr": { + "Package": "dtplyr", + "Version": "1.3.1", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "cli", + "data.table", + "dplyr", + "glue", + "lifecycle", + "rlang", + "tibble", + "tidyselect", + "vctrs" + ], + "Hash": "54ed3ea01b11e81a86544faaecfef8e2" + }, + "ellipsis": { + "Package": "ellipsis", + "Version": "0.3.2", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "rlang" + ], + "Hash": "bb0eec2fe32e88d9e2836c2f73ea2077" + }, + "evaluate": { + "Package": "evaluate", + "Version": "0.22", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "methods" + ], + "Hash": "66f39c7a21e03c4dcb2c2d21d738d603" + }, + "fansi": { + "Package": "fansi", + "Version": "1.0.5", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "grDevices", + "utils" + ], + "Hash": "3e8583a60163b4bc1a80016e63b9959e" + }, + "farver": { + "Package": "farver", + "Version": "2.1.1", + "Source": "Repository", + "Repository": "RSPM", + "Hash": "8106d78941f34855c440ddb946b8f7a5" + }, + "fastmap": { + "Package": "fastmap", + "Version": "1.1.1", + "Source": "Repository", + "Repository": "RSPM", + "Hash": "f7736a18de97dea803bde0a2daaafb27" + }, + "fontawesome": { + "Package": "fontawesome", + "Version": "0.5.2", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "htmltools", + "rlang" + ], + "Hash": "c2efdd5f0bcd1ea861c2d4e2a883a67d" + }, + "forcats": { + "Package": "forcats", + "Version": "1.0.0", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "cli", + "glue", + "lifecycle", + "magrittr", + "rlang", + "tibble" + ], + "Hash": "1a0a9a3d5083d0d573c4214576f1e690" + }, + "foreach": { + "Package": "foreach", + "Version": "1.5.2", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "codetools", + "iterators", + "utils" + ], + "Hash": "618609b42c9406731ead03adf5379850" + }, + "fs": { + "Package": "fs", + "Version": "1.6.3", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "methods" + ], + "Hash": "47b5f30c720c23999b913a1a635cf0bb" + }, + "gargle": { + "Package": "gargle", + "Version": "1.5.2", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "cli", + "fs", + "glue", + "httr", + "jsonlite", + "lifecycle", + "openssl", + "rappdirs", + "rlang", + "stats", + "utils", + "withr" + ], + "Hash": "fc0b272e5847c58cd5da9b20eedbd026" + }, + "generics": { + "Package": "generics", + "Version": "0.1.3", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "methods" + ], + "Hash": "15e9634c0fcd294799e9b2e929ed1b86" + }, + "getopt": { + "Package": "getopt", + "Version": "1.20.4", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "stats" + ], + "Hash": "ed33b16c6d24f7ced1d68877ac2509ee" + }, + "ggplot2": { + "Package": "ggplot2", + "Version": "3.4.4", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "MASS", + "R", + "cli", + "glue", + "grDevices", + "grid", + "gtable", + "isoband", + "lifecycle", + "mgcv", + "rlang", + "scales", + "stats", + "tibble", + "vctrs", + "withr" + ], + "Hash": "313d31eff2274ecf4c1d3581db7241f9" + }, + "ggrepel": { + "Package": "ggrepel", + "Version": "0.9.4", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "Rcpp", + "ggplot2", + "grid", + "rlang", + "scales", + "withr" + ], + "Hash": "e9839af82cc43fda486a638b68b439b2" + }, + "glue": { + "Package": "glue", + "Version": "1.6.2", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "methods" + ], + "Hash": "4f2596dfb05dac67b9dc558e5c6fba2e" + }, + "googledrive": { + "Package": "googledrive", + "Version": "2.1.1", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "cli", + "gargle", + "glue", + "httr", + "jsonlite", + "lifecycle", + "magrittr", + "pillar", + "purrr", + "rlang", + "tibble", + "utils", + "uuid", + "vctrs", + "withr" + ], + "Hash": "e99641edef03e2a5e87f0a0b1fcc97f4" + }, + "googlesheets4": { + "Package": "googlesheets4", + "Version": "1.1.1", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "cellranger", + "cli", + "curl", + "gargle", + "glue", + "googledrive", + "httr", + "ids", + "lifecycle", + "magrittr", + "methods", + "purrr", + "rematch2", + "rlang", + "tibble", + "utils", + "vctrs", + "withr" + ], + "Hash": "d6db1667059d027da730decdc214b959" + }, + "gtable": { + "Package": "gtable", + "Version": "0.3.4", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "cli", + "glue", + "grid", + "lifecycle", + "rlang" + ], + "Hash": "b29cf3031f49b04ab9c852c912547eef" + }, + "haven": { + "Package": "haven", + "Version": "2.5.3", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "cli", + "cpp11", + "forcats", + "hms", + "lifecycle", + "methods", + "readr", + "rlang", + "tibble", + "tidyselect", + "vctrs" + ], + "Hash": "9b302fe352f9cfc5dcf0a4139af3a565" + }, + "highr": { + "Package": "highr", + "Version": "0.10", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "xfun" + ], + "Hash": "06230136b2d2b9ba5805e1963fa6e890" + }, + "hms": { + "Package": "hms", + "Version": "1.1.3", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "lifecycle", + "methods", + "pkgconfig", + "rlang", + "vctrs" + ], + "Hash": "b59377caa7ed00fa41808342002138f9" + }, + "htmltools": { + "Package": "htmltools", + "Version": "0.5.6.1", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "base64enc", + "digest", + "ellipsis", + "fastmap", + "grDevices", + "rlang", + "utils" + ], + "Hash": "1e12fe667316a76508898839ecfb2d00" + }, + "httr": { + "Package": "httr", + "Version": "1.4.7", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "R6", + "curl", + "jsonlite", + "mime", + "openssl" + ], + "Hash": "ac107251d9d9fd72f0ca8049988f1d7f" + }, + "ids": { + "Package": "ids", + "Version": "1.0.1", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "openssl", + "uuid" + ], + "Hash": "99df65cfef20e525ed38c3d2577f7190" + }, + "insight": { + "Package": "insight", + "Version": "0.19.6", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "methods", + "stats", + "utils" + ], + "Hash": "cc21c0957774c1602ec3324a5a47d798" + }, + "isoband": { + "Package": "isoband", + "Version": "0.2.7", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "grid", + "utils" + ], + "Hash": "0080607b4a1a7b28979aecef976d8bc2" + }, + "iterators": { + "Package": "iterators", + "Version": "1.0.14", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "utils" + ], + "Hash": "8954069286b4b2b0d023d1b288dce978" + }, + "janitor": { + "Package": "janitor", + "Version": "2.2.0", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "dplyr", + "hms", + "lifecycle", + "lubridate", + "magrittr", + "purrr", + "rlang", + "snakecase", + "stringi", + "stringr", + "tidyr", + "tidyselect" + ], + "Hash": "5baae149f1082f466df9d1442ba7aa65" + }, + "jquerylib": { + "Package": "jquerylib", + "Version": "0.1.4", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "htmltools" + ], + "Hash": "5aab57a3bd297eee1c1d862735972182" + }, + "jsonlite": { + "Package": "jsonlite", + "Version": "1.8.7", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "methods" + ], + "Hash": "266a20443ca13c65688b2116d5220f76" + }, + "knitr": { + "Package": "knitr", + "Version": "1.44", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "evaluate", + "highr", + "methods", + "tools", + "xfun", + "yaml" + ], + "Hash": "60885b9f746c9dfaef110d070b5f7dc0" + }, + "labeling": { + "Package": "labeling", + "Version": "0.4.3", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "graphics", + "stats" + ], + "Hash": "b64ec208ac5bc1852b285f665d6368b3" + }, + "lattice": { + "Package": "lattice", + "Version": "0.21-8", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "grDevices", + "graphics", + "grid", + "stats", + "utils" + ], + "Hash": "0b8a6d63c8770f02a8b5635f3c431e6b" + }, + "lifecycle": { + "Package": "lifecycle", + "Version": "1.0.3", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "cli", + "glue", + "rlang" + ], + "Hash": "001cecbeac1cff9301bdc3775ee46a86" + }, + "lubridate": { + "Package": "lubridate", + "Version": "1.9.3", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "generics", + "methods", + "timechange" + ], + "Hash": "680ad542fbcf801442c83a6ac5a2126c" + }, + "magrittr": { + "Package": "magrittr", + "Version": "2.0.3", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R" + ], + "Hash": "7ce2733a9826b3aeb1775d56fd305472" + }, + "matrixStats": { + "Package": "matrixStats", + "Version": "1.0.0", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R" + ], + "Hash": "9143629fd64335aac6a6250d1c1ed82a" + }, + "memoise": { + "Package": "memoise", + "Version": "2.0.1", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "cachem", + "rlang" + ], + "Hash": "e2817ccf4a065c5d9d7f2cfbe7c1d78c" + }, + "mgcv": { + "Package": "mgcv", + "Version": "1.8-42", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "Matrix", + "R", + "graphics", + "methods", + "nlme", + "splines", + "stats", + "utils" + ], + "Hash": "3460beba7ccc8946249ba35327ba902a" + }, + "mime": { + "Package": "mime", + "Version": "0.12", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "tools" + ], + "Hash": "18e9c28c1d3ca1560ce30658b22ce104" + }, + "modelr": { + "Package": "modelr", + "Version": "0.1.11", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "broom", + "magrittr", + "purrr", + "rlang", + "tibble", + "tidyr", + "tidyselect", + "vctrs" + ], + "Hash": "4f50122dc256b1b6996a4703fecea821" + }, + "munsell": { + "Package": "munsell", + "Version": "0.5.0", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "colorspace", + "methods" + ], + "Hash": "6dfe8bf774944bd5595785e3229d8771" + }, + "nlme": { + "Package": "nlme", + "Version": "3.1-162", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "graphics", + "lattice", + "stats", + "utils" + ], + "Hash": "0984ce8da8da9ead8643c5cbbb60f83e" + }, + "openssl": { + "Package": "openssl", + "Version": "2.1.1", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "askpass" + ], + "Hash": "2a0dc8c6adfb6f032e4d4af82d258ab5" + }, + "optparse": { + "Package": "optparse", + "Version": "1.7.3", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "getopt", + "methods" + ], + "Hash": "aa4a7717b5760a769c7fd3d34614f2a2" + }, + "parameters": { + "Package": "parameters", + "Version": "0.21.2", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "bayestestR", + "datawizard", + "graphics", + "insight", + "methods", + "stats", + "utils" + ], + "Hash": "7bca0c1c6f188b195a5f380b8e73b91a" + }, + "pillar": { + "Package": "pillar", + "Version": "1.9.0", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "cli", + "fansi", + "glue", + "lifecycle", + "rlang", + "utf8", + "utils", + "vctrs" + ], + "Hash": "15da5a8412f317beeee6175fbc76f4bb" + }, + "pkgconfig": { + "Package": "pkgconfig", + "Version": "2.0.3", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "utils" + ], + "Hash": "01f28d4278f15c76cddbea05899c5d6f" + }, + "png": { + "Package": "png", + "Version": "0.1-8", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R" + ], + "Hash": "bd54ba8a0a5faded999a7aab6e46b374" + }, + "ppcor": { + "Package": "ppcor", + "Version": "1.1", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "MASS", + "R" + ], + "Hash": "0b26c0c84f22515249dd7915f4214d32" + }, + "prettyunits": { + "Package": "prettyunits", + "Version": "1.2.0", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R" + ], + "Hash": "6b01fc98b1e86c4f705ce9dcfd2f57c7" + }, + "processx": { + "Package": "processx", + "Version": "3.8.2", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "R6", + "ps", + "utils" + ], + "Hash": "3efbd8ac1be0296a46c55387aeace0f3" + }, + "progress": { + "Package": "progress", + "Version": "1.2.2", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R6", + "crayon", + "hms", + "prettyunits" + ], + "Hash": "14dc9f7a3c91ebb14ec5bb9208a07061" + }, + "ps": { + "Package": "ps", + "Version": "1.7.5", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "utils" + ], + "Hash": "709d852d33178db54b17c722e5b1e594" + }, + "purrr": { + "Package": "purrr", + "Version": "1.0.2", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "cli", + "lifecycle", + "magrittr", + "rlang", + "vctrs" + ], + "Hash": "1cba04a4e9414bdefc9dcaa99649a8dc" + }, + "ragg": { + "Package": "ragg", + "Version": "1.2.6", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "systemfonts", + "textshaping" + ], + "Hash": "6ba2fa8740abdc2cc148407836509901" + }, + "rappdirs": { + "Package": "rappdirs", + "Version": "0.3.3", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R" + ], + "Hash": "5e3c5dc0b071b21fa128676560dbe94d" + }, + "readr": { + "Package": "readr", + "Version": "2.1.4", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "R6", + "cli", + "clipr", + "cpp11", + "crayon", + "hms", + "lifecycle", + "methods", + "rlang", + "tibble", + "tzdb", + "utils", + "vroom" + ], + "Hash": "b5047343b3825f37ad9d3b5d89aa1078" + }, + "readxl": { + "Package": "readxl", + "Version": "1.4.3", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "cellranger", + "cpp11", + "progress", + "tibble", + "utils" + ], + "Hash": "8cf9c239b96df1bbb133b74aef77ad0a" + }, + "rematch": { + "Package": "rematch", + "Version": "2.0.0", + "Source": "Repository", + "Repository": "RSPM", + "Hash": "cbff1b666c6fa6d21202f07e2318d4f1" + }, + "rematch2": { + "Package": "rematch2", + "Version": "2.1.2", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "tibble" + ], + "Hash": "76c9e04c712a05848ae7a23d2f170a40" + }, + "renv": { + "Package": "renv", + "Version": "1.0.3", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "utils" + ], + "Hash": "41b847654f567341725473431dd0d5ab" + }, + "reprex": { + "Package": "reprex", + "Version": "2.0.2", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "callr", + "cli", + "clipr", + "fs", + "glue", + "knitr", + "lifecycle", + "rlang", + "rmarkdown", + "rstudioapi", + "utils", + "withr" + ], + "Hash": "d66fe009d4c20b7ab1927eb405db9ee2" + }, + "rjson": { + "Package": "rjson", + "Version": "0.2.21", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R" + ], + "Hash": "f9da75e6444e95a1baf8ca24909d63b9" + }, + "rlang": { + "Package": "rlang", + "Version": "1.1.1", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "utils" + ], + "Hash": "a85c767b55f0bf9b7ad16c6d7baee5bb" + }, + "rmarkdown": { + "Package": "rmarkdown", + "Version": "2.25", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "bslib", + "evaluate", + "fontawesome", + "htmltools", + "jquerylib", + "jsonlite", + "knitr", + "methods", + "stringr", + "tinytex", + "tools", + "utils", + "xfun", + "yaml" + ], + "Hash": "d65e35823c817f09f4de424fcdfa812a" + }, + "rstudioapi": { + "Package": "rstudioapi", + "Version": "0.15.0", + "Source": "Repository", + "Repository": "RSPM", + "Hash": "5564500e25cffad9e22244ced1379887" + }, + "rvest": { + "Package": "rvest", + "Version": "1.0.3", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "cli", + "glue", + "httr", + "lifecycle", + "magrittr", + "rlang", + "selectr", + "tibble", + "withr", + "xml2" + ], + "Hash": "a4a5ac819a467808c60e36e92ddf195e" + }, + "sass": { + "Package": "sass", + "Version": "0.4.7", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R6", + "fs", + "htmltools", + "rappdirs", + "rlang" + ], + "Hash": "6bd4d33b50ff927191ec9acbf52fd056" + }, + "scales": { + "Package": "scales", + "Version": "1.2.1", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "R6", + "RColorBrewer", + "farver", + "labeling", + "lifecycle", + "munsell", + "rlang", + "viridisLite" + ], + "Hash": "906cb23d2f1c5680b8ce439b44c6fa63" + }, + "selectr": { + "Package": "selectr", + "Version": "0.4-2", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "R6", + "methods", + "stringr" + ], + "Hash": "3838071b66e0c566d55cc26bd6e27bf4" + }, + "shape": { + "Package": "shape", + "Version": "1.4.6", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "grDevices", + "graphics", + "stats" + ], + "Hash": "9067f962730f58b14d8ae54ca885509f" + }, + "snakecase": { + "Package": "snakecase", + "Version": "0.11.1", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "stringi", + "stringr" + ], + "Hash": "58767e44739b76965332e8a4fe3f91f1" + }, + "stringi": { + "Package": "stringi", + "Version": "1.7.12", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "stats", + "tools", + "utils" + ], + "Hash": "ca8bd84263c77310739d2cf64d84d7c9" + }, + "stringr": { + "Package": "stringr", + "Version": "1.5.0", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "cli", + "glue", + "lifecycle", + "magrittr", + "rlang", + "stringi", + "vctrs" + ], + "Hash": "671a4d384ae9d32fc47a14e98bfa3dc8" + }, + "sys": { + "Package": "sys", + "Version": "3.4.2", + "Source": "Repository", + "Repository": "RSPM", + "Hash": "3a1be13d68d47a8cd0bfd74739ca1555" + }, + "systemfonts": { + "Package": "systemfonts", + "Version": "1.0.5", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "cpp11" + ], + "Hash": "15b594369e70b975ba9f064295983499" + }, + "textshaping": { + "Package": "textshaping", + "Version": "0.3.7", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "cpp11", + "systemfonts" + ], + "Hash": "997aac9ad649e0ef3b97f96cddd5622b" + }, + "tibble": { + "Package": "tibble", + "Version": "3.2.1", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "fansi", + "lifecycle", + "magrittr", + "methods", + "pillar", + "pkgconfig", + "rlang", + "utils", + "vctrs" + ], + "Hash": "a84e2cc86d07289b3b6f5069df7a004c" + }, + "tidyr": { + "Package": "tidyr", + "Version": "1.3.0", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "cli", + "cpp11", + "dplyr", + "glue", + "lifecycle", + "magrittr", + "purrr", + "rlang", + "stringr", + "tibble", + "tidyselect", + "utils", + "vctrs" + ], + "Hash": "e47debdc7ce599b070c8e78e8ac0cfcf" + }, + "tidyselect": { + "Package": "tidyselect", + "Version": "1.2.0", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "cli", + "glue", + "lifecycle", + "rlang", + "vctrs", + "withr" + ], + "Hash": "79540e5fcd9e0435af547d885f184fd5" + }, + "tidyverse": { + "Package": "tidyverse", + "Version": "2.0.0", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "broom", + "cli", + "conflicted", + "dbplyr", + "dplyr", + "dtplyr", + "forcats", + "ggplot2", + "googledrive", + "googlesheets4", + "haven", + "hms", + "httr", + "jsonlite", + "lubridate", + "magrittr", + "modelr", + "pillar", + "purrr", + "ragg", + "readr", + "readxl", + "reprex", + "rlang", + "rstudioapi", + "rvest", + "stringr", + "tibble", + "tidyr", + "xml2" + ], + "Hash": "c328568cd14ea89a83bd4ca7f54ae07e" + }, + "timechange": { + "Package": "timechange", + "Version": "0.2.0", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "cpp11" + ], + "Hash": "8548b44f79a35ba1791308b61e6012d7" + }, + "tinytex": { + "Package": "tinytex", + "Version": "0.48", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "xfun" + ], + "Hash": "8f96d229b7311beb32b94cf413b13f84" + }, + "tzdb": { + "Package": "tzdb", + "Version": "0.4.0", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "cpp11" + ], + "Hash": "f561504ec2897f4d46f0c7657e488ae1" + }, + "utf8": { + "Package": "utf8", + "Version": "1.2.4", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R" + ], + "Hash": "62b65c52671e6665f803ff02954446e9" + }, + "uuid": { + "Package": "uuid", + "Version": "1.1-1", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R" + ], + "Hash": "3d78edfb977a69fc7a0341bee25e163f" + }, + "vctrs": { + "Package": "vctrs", + "Version": "0.6.4", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "cli", + "glue", + "lifecycle", + "rlang" + ], + "Hash": "266c1ca411266ba8f365fcc726444b87" + }, + "viridisLite": { + "Package": "viridisLite", + "Version": "0.4.2", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R" + ], + "Hash": "c826c7c4241b6fc89ff55aaea3fa7491" + }, + "vroom": { + "Package": "vroom", + "Version": "1.6.4", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "bit64", + "cli", + "cpp11", + "crayon", + "glue", + "hms", + "lifecycle", + "methods", + "progress", + "rlang", + "stats", + "tibble", + "tidyselect", + "tzdb", + "vctrs", + "withr" + ], + "Hash": "9db52c1656cf19c124f93124ea57f0fd" + }, + "withr": { + "Package": "withr", + "Version": "2.5.1", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "grDevices", + "graphics", + "stats" + ], + "Hash": "d77c6f74be05c33164e33fbc85540cae" + }, + "writexl": { + "Package": "writexl", + "Version": "1.4.2", + "Source": "Repository", + "Repository": "RSPM", + "Hash": "43da939eaf6681c88eba977b9012dad9" + }, + "xfun": { + "Package": "xfun", + "Version": "0.40", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "stats", + "tools" + ], + "Hash": "be07d23211245fc7d4209f54c4e4ffc8" + }, + "xml2": { + "Package": "xml2", + "Version": "1.3.5", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "methods" + ], + "Hash": "6c40e5cfcc6aefd88110666e18c31f40" + }, + "yaml": { + "Package": "yaml", + "Version": "2.3.7", + "Source": "Repository", + "Repository": "RSPM", + "Hash": "0d0056cc5383fbc240ccd0cb584bf436" + } + } +} diff --git a/renv/.gitignore b/renv/.gitignore new file mode 100644 index 0000000..0ec0cbb --- /dev/null +++ b/renv/.gitignore @@ -0,0 +1,7 @@ +library/ +local/ +cellar/ +lock/ +python/ +sandbox/ +staging/ diff --git a/renv/activate.R b/renv/activate.R new file mode 100644 index 0000000..cb5401f --- /dev/null +++ b/renv/activate.R @@ -0,0 +1,1180 @@ + +local({ + + # the requested version of renv + version <- "1.0.3" + attr(version, "sha") <- NULL + + # the project directory + project <- getwd() + + # use start-up diagnostics if enabled + diagnostics <- Sys.getenv("RENV_STARTUP_DIAGNOSTICS", unset = "FALSE") + if (diagnostics) { + start <- Sys.time() + profile <- tempfile("renv-startup-", fileext = ".Rprof") + utils::Rprof(profile) + on.exit({ + utils::Rprof(NULL) + elapsed <- signif(difftime(Sys.time(), start, units = "auto"), digits = 2L) + writeLines(sprintf("- renv took %s to run the autoloader.", format(elapsed))) + writeLines(sprintf("- Profile: %s", profile)) + print(utils::summaryRprof(profile)) + }, add = TRUE) + } + + # figure out whether the autoloader is enabled + enabled <- local({ + + # first, check config option + override <- getOption("renv.config.autoloader.enabled") + if (!is.null(override)) + return(override) + + # next, check environment variables + # TODO: prefer using the configuration one in the future + envvars <- c( + "RENV_CONFIG_AUTOLOADER_ENABLED", + "RENV_AUTOLOADER_ENABLED", + "RENV_ACTIVATE_PROJECT" + ) + + for (envvar in envvars) { + envval <- Sys.getenv(envvar, unset = NA) + if (!is.na(envval)) + return(tolower(envval) %in% c("true", "t", "1")) + } + + # enable by default + TRUE + + }) + + if (!enabled) + return(FALSE) + + # avoid recursion + if (identical(getOption("renv.autoloader.running"), TRUE)) { + warning("ignoring recursive attempt to run renv autoloader") + return(invisible(TRUE)) + } + + # signal that we're loading renv during R startup + options(renv.autoloader.running = TRUE) + on.exit(options(renv.autoloader.running = NULL), add = TRUE) + + # signal that we've consented to use renv + options(renv.consent = TRUE) + + # load the 'utils' package eagerly -- this ensures that renv shims, which + # mask 'utils' packages, will come first on the search path + library(utils, lib.loc = .Library) + + # unload renv if it's already been loaded + if ("renv" %in% loadedNamespaces()) + unloadNamespace("renv") + + # load bootstrap tools + `%||%` <- function(x, y) { + if (is.null(x)) y else x + } + + catf <- function(fmt, ..., appendLF = TRUE) { + + quiet <- getOption("renv.bootstrap.quiet", default = FALSE) + if (quiet) + return(invisible()) + + msg <- sprintf(fmt, ...) + cat(msg, file = stdout(), sep = if (appendLF) "\n" else "") + + invisible(msg) + + } + + header <- function(label, + ..., + prefix = "#", + suffix = "-", + n = min(getOption("width"), 78)) + { + label <- sprintf(label, ...) + n <- max(n - nchar(label) - nchar(prefix) - 2L, 8L) + if (n <= 0) + return(paste(prefix, label)) + + tail <- paste(rep.int(suffix, n), collapse = "") + paste0(prefix, " ", label, " ", tail) + + } + + startswith <- function(string, prefix) { + substring(string, 1, nchar(prefix)) == prefix + } + + bootstrap <- function(version, library) { + + friendly <- renv_bootstrap_version_friendly(version) + section <- header(sprintf("Bootstrapping renv %s", friendly)) + catf(section) + + # attempt to download renv + catf("- Downloading renv ... ", appendLF = FALSE) + withCallingHandlers( + tarball <- renv_bootstrap_download(version), + error = function(err) { + catf("FAILED") + stop("failed to download:\n", conditionMessage(err)) + } + ) + catf("OK") + on.exit(unlink(tarball), add = TRUE) + + # now attempt to install + catf("- Installing renv ... ", appendLF = FALSE) + withCallingHandlers( + status <- renv_bootstrap_install(version, tarball, library), + error = function(err) { + catf("FAILED") + stop("failed to install:\n", conditionMessage(err)) + } + ) + catf("OK") + + # add empty line to break up bootstrapping from normal output + catf("") + + return(invisible()) + } + + renv_bootstrap_tests_running <- function() { + getOption("renv.tests.running", default = FALSE) + } + + renv_bootstrap_repos <- function() { + + # get CRAN repository + cran <- getOption("renv.repos.cran", "https://cloud.r-project.org") + + # check for repos override + repos <- Sys.getenv("RENV_CONFIG_REPOS_OVERRIDE", unset = NA) + if (!is.na(repos)) { + + # check for RSPM; if set, use a fallback repository for renv + rspm <- Sys.getenv("RSPM", unset = NA) + if (identical(rspm, repos)) + repos <- c(RSPM = rspm, CRAN = cran) + + return(repos) + + } + + # check for lockfile repositories + repos <- tryCatch(renv_bootstrap_repos_lockfile(), error = identity) + if (!inherits(repos, "error") && length(repos)) + return(repos) + + # retrieve current repos + repos <- getOption("repos") + + # ensure @CRAN@ entries are resolved + repos[repos == "@CRAN@"] <- cran + + # add in renv.bootstrap.repos if set + default <- c(FALLBACK = "https://cloud.r-project.org") + extra <- getOption("renv.bootstrap.repos", default = default) + repos <- c(repos, extra) + + # remove duplicates that might've snuck in + dupes <- duplicated(repos) | duplicated(names(repos)) + repos[!dupes] + + } + + renv_bootstrap_repos_lockfile <- function() { + + lockpath <- Sys.getenv("RENV_PATHS_LOCKFILE", unset = "renv.lock") + if (!file.exists(lockpath)) + return(NULL) + + lockfile <- tryCatch(renv_json_read(lockpath), error = identity) + if (inherits(lockfile, "error")) { + warning(lockfile) + return(NULL) + } + + repos <- lockfile$R$Repositories + if (length(repos) == 0) + return(NULL) + + keys <- vapply(repos, `[[`, "Name", FUN.VALUE = character(1)) + vals <- vapply(repos, `[[`, "URL", FUN.VALUE = character(1)) + names(vals) <- keys + + return(vals) + + } + + renv_bootstrap_download <- function(version) { + + sha <- attr(version, "sha", exact = TRUE) + + methods <- if (!is.null(sha)) { + + # attempting to bootstrap a development version of renv + c( + function() renv_bootstrap_download_tarball(sha), + function() renv_bootstrap_download_github(sha) + ) + + } else { + + # attempting to bootstrap a release version of renv + c( + function() renv_bootstrap_download_tarball(version), + function() renv_bootstrap_download_cran_latest(version), + function() renv_bootstrap_download_cran_archive(version) + ) + + } + + for (method in methods) { + path <- tryCatch(method(), error = identity) + if (is.character(path) && file.exists(path)) + return(path) + } + + stop("All download methods failed") + + } + + renv_bootstrap_download_impl <- function(url, destfile) { + + mode <- "wb" + + # https://bugs.r-project.org/bugzilla/show_bug.cgi?id=17715 + fixup <- + Sys.info()[["sysname"]] == "Windows" && + substring(url, 1L, 5L) == "file:" + + if (fixup) + mode <- "w+b" + + args <- list( + url = url, + destfile = destfile, + mode = mode, + quiet = TRUE + ) + + if ("headers" %in% names(formals(utils::download.file))) + args$headers <- renv_bootstrap_download_custom_headers(url) + + do.call(utils::download.file, args) + + } + + renv_bootstrap_download_custom_headers <- function(url) { + + headers <- getOption("renv.download.headers") + if (is.null(headers)) + return(character()) + + if (!is.function(headers)) + stopf("'renv.download.headers' is not a function") + + headers <- headers(url) + if (length(headers) == 0L) + return(character()) + + if (is.list(headers)) + headers <- unlist(headers, recursive = FALSE, use.names = TRUE) + + ok <- + is.character(headers) && + is.character(names(headers)) && + all(nzchar(names(headers))) + + if (!ok) + stop("invocation of 'renv.download.headers' did not return a named character vector") + + headers + + } + + renv_bootstrap_download_cran_latest <- function(version) { + + spec <- renv_bootstrap_download_cran_latest_find(version) + type <- spec$type + repos <- spec$repos + + baseurl <- utils::contrib.url(repos = repos, type = type) + ext <- if (identical(type, "source")) + ".tar.gz" + else if (Sys.info()[["sysname"]] == "Windows") + ".zip" + else + ".tgz" + name <- sprintf("renv_%s%s", version, ext) + url <- paste(baseurl, name, sep = "/") + + destfile <- file.path(tempdir(), name) + status <- tryCatch( + renv_bootstrap_download_impl(url, destfile), + condition = identity + ) + + if (inherits(status, "condition")) + return(FALSE) + + # report success and return + destfile + + } + + renv_bootstrap_download_cran_latest_find <- function(version) { + + # check whether binaries are supported on this system + binary <- + getOption("renv.bootstrap.binary", default = TRUE) && + !identical(.Platform$pkgType, "source") && + !identical(getOption("pkgType"), "source") && + Sys.info()[["sysname"]] %in% c("Darwin", "Windows") + + types <- c(if (binary) "binary", "source") + + # iterate over types + repositories + for (type in types) { + for (repos in renv_bootstrap_repos()) { + + # retrieve package database + db <- tryCatch( + as.data.frame( + utils::available.packages(type = type, repos = repos), + stringsAsFactors = FALSE + ), + error = identity + ) + + if (inherits(db, "error")) + next + + # check for compatible entry + entry <- db[db$Package %in% "renv" & db$Version %in% version, ] + if (nrow(entry) == 0) + next + + # found it; return spec to caller + spec <- list(entry = entry, type = type, repos = repos) + return(spec) + + } + } + + # if we got here, we failed to find renv + fmt <- "renv %s is not available from your declared package repositories" + stop(sprintf(fmt, version)) + + } + + renv_bootstrap_download_cran_archive <- function(version) { + + name <- sprintf("renv_%s.tar.gz", version) + repos <- renv_bootstrap_repos() + urls <- file.path(repos, "src/contrib/Archive/renv", name) + destfile <- file.path(tempdir(), name) + + for (url in urls) { + + status <- tryCatch( + renv_bootstrap_download_impl(url, destfile), + condition = identity + ) + + if (identical(status, 0L)) + return(destfile) + + } + + return(FALSE) + + } + + renv_bootstrap_download_tarball <- function(version) { + + # if the user has provided the path to a tarball via + # an environment variable, then use it + tarball <- Sys.getenv("RENV_BOOTSTRAP_TARBALL", unset = NA) + if (is.na(tarball)) + return() + + # allow directories + if (dir.exists(tarball)) { + name <- sprintf("renv_%s.tar.gz", version) + tarball <- file.path(tarball, name) + } + + # bail if it doesn't exist + if (!file.exists(tarball)) { + + # let the user know we weren't able to honour their request + fmt <- "- RENV_BOOTSTRAP_TARBALL is set (%s) but does not exist." + msg <- sprintf(fmt, tarball) + warning(msg) + + # bail + return() + + } + + catf("- Using local tarball '%s'.", tarball) + tarball + + } + + renv_bootstrap_download_github <- function(version) { + + enabled <- Sys.getenv("RENV_BOOTSTRAP_FROM_GITHUB", unset = "TRUE") + if (!identical(enabled, "TRUE")) + return(FALSE) + + # prepare download options + pat <- Sys.getenv("GITHUB_PAT") + if (nzchar(Sys.which("curl")) && nzchar(pat)) { + fmt <- "--location --fail --header \"Authorization: token %s\"" + extra <- sprintf(fmt, pat) + saved <- options("download.file.method", "download.file.extra") + options(download.file.method = "curl", download.file.extra = extra) + on.exit(do.call(base::options, saved), add = TRUE) + } else if (nzchar(Sys.which("wget")) && nzchar(pat)) { + fmt <- "--header=\"Authorization: token %s\"" + extra <- sprintf(fmt, pat) + saved <- options("download.file.method", "download.file.extra") + options(download.file.method = "wget", download.file.extra = extra) + on.exit(do.call(base::options, saved), add = TRUE) + } + + url <- file.path("https://api.github.com/repos/rstudio/renv/tarball", version) + name <- sprintf("renv_%s.tar.gz", version) + destfile <- file.path(tempdir(), name) + + status <- tryCatch( + renv_bootstrap_download_impl(url, destfile), + condition = identity + ) + + if (!identical(status, 0L)) + return(FALSE) + + renv_bootstrap_download_augment(destfile) + + return(destfile) + + } + + # Add Sha to DESCRIPTION. This is stop gap until #890, after which we + # can use renv::install() to fully capture metadata. + renv_bootstrap_download_augment <- function(destfile) { + sha <- renv_bootstrap_git_extract_sha1_tar(destfile) + if (is.null(sha)) { + return() + } + + # Untar + tempdir <- tempfile("renv-github-") + on.exit(unlink(tempdir, recursive = TRUE), add = TRUE) + untar(destfile, exdir = tempdir) + pkgdir <- dir(tempdir, full.names = TRUE)[[1]] + + # Modify description + desc_path <- file.path(pkgdir, "DESCRIPTION") + desc_lines <- readLines(desc_path) + remotes_fields <- c( + "RemoteType: github", + "RemoteHost: api.github.com", + "RemoteRepo: renv", + "RemoteUsername: rstudio", + "RemotePkgRef: rstudio/renv", + paste("RemoteRef: ", sha), + paste("RemoteSha: ", sha) + ) + writeLines(c(desc_lines[desc_lines != ""], remotes_fields), con = desc_path) + + # Re-tar + local({ + old <- setwd(tempdir) + on.exit(setwd(old), add = TRUE) + + tar(destfile, compression = "gzip") + }) + invisible() + } + + # Extract the commit hash from a git archive. Git archives include the SHA1 + # hash as the comment field of the tarball pax extended header + # (see https://www.kernel.org/pub/software/scm/git/docs/git-archive.html) + # For GitHub archives this should be the first header after the default one + # (512 byte) header. + renv_bootstrap_git_extract_sha1_tar <- function(bundle) { + + # open the bundle for reading + # We use gzcon for everything because (from ?gzcon) + # > Reading from a connection which does not supply a 'gzip' magic + # > header is equivalent to reading from the original connection + conn <- gzcon(file(bundle, open = "rb", raw = TRUE)) + on.exit(close(conn)) + + # The default pax header is 512 bytes long and the first pax extended header + # with the comment should be 51 bytes long + # `52 comment=` (11 chars) + 40 byte SHA1 hash + len <- 0x200 + 0x33 + res <- rawToChar(readBin(conn, "raw", n = len)[0x201:len]) + + if (grepl("^52 comment=", res)) { + sub("52 comment=", "", res) + } else { + NULL + } + } + + renv_bootstrap_install <- function(version, tarball, library) { + + # attempt to install it into project library + dir.create(library, showWarnings = FALSE, recursive = TRUE) + output <- renv_bootstrap_install_impl(library, tarball) + + # check for successful install + status <- attr(output, "status") + if (is.null(status) || identical(status, 0L)) + return(status) + + # an error occurred; report it + header <- "installation of renv failed" + lines <- paste(rep.int("=", nchar(header)), collapse = "") + text <- paste(c(header, lines, output), collapse = "\n") + stop(text) + + } + + renv_bootstrap_install_impl <- function(library, tarball) { + + # invoke using system2 so we can capture and report output + bin <- R.home("bin") + exe <- if (Sys.info()[["sysname"]] == "Windows") "R.exe" else "R" + R <- file.path(bin, exe) + + args <- c( + "--vanilla", "CMD", "INSTALL", "--no-multiarch", + "-l", shQuote(path.expand(library)), + shQuote(path.expand(tarball)) + ) + + system2(R, args, stdout = TRUE, stderr = TRUE) + + } + + renv_bootstrap_platform_prefix <- function() { + + # construct version prefix + version <- paste(R.version$major, R.version$minor, sep = ".") + prefix <- paste("R", numeric_version(version)[1, 1:2], sep = "-") + + # include SVN revision for development versions of R + # (to avoid sharing platform-specific artefacts with released versions of R) + devel <- + identical(R.version[["status"]], "Under development (unstable)") || + identical(R.version[["nickname"]], "Unsuffered Consequences") + + if (devel) + prefix <- paste(prefix, R.version[["svn rev"]], sep = "-r") + + # build list of path components + components <- c(prefix, R.version$platform) + + # include prefix if provided by user + prefix <- renv_bootstrap_platform_prefix_impl() + if (!is.na(prefix) && nzchar(prefix)) + components <- c(prefix, components) + + # build prefix + paste(components, collapse = "/") + + } + + renv_bootstrap_platform_prefix_impl <- function() { + + # if an explicit prefix has been supplied, use it + prefix <- Sys.getenv("RENV_PATHS_PREFIX", unset = NA) + if (!is.na(prefix)) + return(prefix) + + # if the user has requested an automatic prefix, generate it + auto <- Sys.getenv("RENV_PATHS_PREFIX_AUTO", unset = NA) + if (auto %in% c("TRUE", "True", "true", "1")) + return(renv_bootstrap_platform_prefix_auto()) + + # empty string on failure + "" + + } + + renv_bootstrap_platform_prefix_auto <- function() { + + prefix <- tryCatch(renv_bootstrap_platform_os(), error = identity) + if (inherits(prefix, "error") || prefix %in% "unknown") { + + msg <- paste( + "failed to infer current operating system", + "please file a bug report at https://github.com/rstudio/renv/issues", + sep = "; " + ) + + warning(msg) + + } + + prefix + + } + + renv_bootstrap_platform_os <- function() { + + sysinfo <- Sys.info() + sysname <- sysinfo[["sysname"]] + + # handle Windows + macOS up front + if (sysname == "Windows") + return("windows") + else if (sysname == "Darwin") + return("macos") + + # check for os-release files + for (file in c("/etc/os-release", "/usr/lib/os-release")) + if (file.exists(file)) + return(renv_bootstrap_platform_os_via_os_release(file, sysinfo)) + + # check for redhat-release files + if (file.exists("/etc/redhat-release")) + return(renv_bootstrap_platform_os_via_redhat_release()) + + "unknown" + + } + + renv_bootstrap_platform_os_via_os_release <- function(file, sysinfo) { + + # read /etc/os-release + release <- utils::read.table( + file = file, + sep = "=", + quote = c("\"", "'"), + col.names = c("Key", "Value"), + comment.char = "#", + stringsAsFactors = FALSE + ) + + vars <- as.list(release$Value) + names(vars) <- release$Key + + # get os name + os <- tolower(sysinfo[["sysname"]]) + + # read id + id <- "unknown" + for (field in c("ID", "ID_LIKE")) { + if (field %in% names(vars) && nzchar(vars[[field]])) { + id <- vars[[field]] + break + } + } + + # read version + version <- "unknown" + for (field in c("UBUNTU_CODENAME", "VERSION_CODENAME", "VERSION_ID", "BUILD_ID")) { + if (field %in% names(vars) && nzchar(vars[[field]])) { + version <- vars[[field]] + break + } + } + + # join together + paste(c(os, id, version), collapse = "-") + + } + + renv_bootstrap_platform_os_via_redhat_release <- function() { + + # read /etc/redhat-release + contents <- readLines("/etc/redhat-release", warn = FALSE) + + # infer id + id <- if (grepl("centos", contents, ignore.case = TRUE)) + "centos" + else if (grepl("redhat", contents, ignore.case = TRUE)) + "redhat" + else + "unknown" + + # try to find a version component (very hacky) + version <- "unknown" + + parts <- strsplit(contents, "[[:space:]]")[[1L]] + for (part in parts) { + + nv <- tryCatch(numeric_version(part), error = identity) + if (inherits(nv, "error")) + next + + version <- nv[1, 1] + break + + } + + paste(c("linux", id, version), collapse = "-") + + } + + renv_bootstrap_library_root_name <- function(project) { + + # use project name as-is if requested + asis <- Sys.getenv("RENV_PATHS_LIBRARY_ROOT_ASIS", unset = "FALSE") + if (asis) + return(basename(project)) + + # otherwise, disambiguate based on project's path + id <- substring(renv_bootstrap_hash_text(project), 1L, 8L) + paste(basename(project), id, sep = "-") + + } + + renv_bootstrap_library_root <- function(project) { + + prefix <- renv_bootstrap_profile_prefix() + + path <- Sys.getenv("RENV_PATHS_LIBRARY", unset = NA) + if (!is.na(path)) + return(paste(c(path, prefix), collapse = "/")) + + path <- renv_bootstrap_library_root_impl(project) + if (!is.null(path)) { + name <- renv_bootstrap_library_root_name(project) + return(paste(c(path, prefix, name), collapse = "/")) + } + + renv_bootstrap_paths_renv("library", project = project) + + } + + renv_bootstrap_library_root_impl <- function(project) { + + root <- Sys.getenv("RENV_PATHS_LIBRARY_ROOT", unset = NA) + if (!is.na(root)) + return(root) + + type <- renv_bootstrap_project_type(project) + if (identical(type, "package")) { + userdir <- renv_bootstrap_user_dir() + return(file.path(userdir, "library")) + } + + } + + renv_bootstrap_validate_version <- function(version, description = NULL) { + + # resolve description file + # + # avoid passing lib.loc to `packageDescription()` below, since R will + # use the loaded version of the package by default anyhow. note that + # this function should only be called after 'renv' is loaded + # https://github.com/rstudio/renv/issues/1625 + description <- description %||% packageDescription("renv") + + # check whether requested version 'version' matches loaded version of renv + sha <- attr(version, "sha", exact = TRUE) + valid <- if (!is.null(sha)) + renv_bootstrap_validate_version_dev(sha, description) + else + renv_bootstrap_validate_version_release(version, description) + + if (valid) + return(TRUE) + + # the loaded version of renv doesn't match the requested version; + # give the user instructions on how to proceed + remote <- if (!is.null(description[["RemoteSha"]])) { + paste("rstudio/renv", description[["RemoteSha"]], sep = "@") + } else { + paste("renv", description[["Version"]], sep = "@") + } + + # display both loaded version + sha if available + friendly <- renv_bootstrap_version_friendly( + version = description[["Version"]], + sha = description[["RemoteSha"]] + ) + + fmt <- paste( + "renv %1$s was loaded from project library, but this project is configured to use renv %2$s.", + "- Use `renv::record(\"%3$s\")` to record renv %1$s in the lockfile.", + "- Use `renv::restore(packages = \"renv\")` to install renv %2$s into the project library.", + sep = "\n" + ) + catf(fmt, friendly, renv_bootstrap_version_friendly(version), remote) + + FALSE + + } + + renv_bootstrap_validate_version_dev <- function(version, description) { + expected <- description[["RemoteSha"]] + is.character(expected) && startswith(expected, version) + } + + renv_bootstrap_validate_version_release <- function(version, description) { + expected <- description[["Version"]] + is.character(expected) && identical(expected, version) + } + + renv_bootstrap_hash_text <- function(text) { + + hashfile <- tempfile("renv-hash-") + on.exit(unlink(hashfile), add = TRUE) + + writeLines(text, con = hashfile) + tools::md5sum(hashfile) + + } + + renv_bootstrap_load <- function(project, libpath, version) { + + # try to load renv from the project library + if (!requireNamespace("renv", lib.loc = libpath, quietly = TRUE)) + return(FALSE) + + # warn if the version of renv loaded does not match + renv_bootstrap_validate_version(version) + + # execute renv load hooks, if any + hooks <- getHook("renv::autoload") + for (hook in hooks) + if (is.function(hook)) + tryCatch(hook(), error = warnify) + + # load the project + renv::load(project) + + TRUE + + } + + renv_bootstrap_profile_load <- function(project) { + + # if RENV_PROFILE is already set, just use that + profile <- Sys.getenv("RENV_PROFILE", unset = NA) + if (!is.na(profile) && nzchar(profile)) + return(profile) + + # check for a profile file (nothing to do if it doesn't exist) + path <- renv_bootstrap_paths_renv("profile", profile = FALSE, project = project) + if (!file.exists(path)) + return(NULL) + + # read the profile, and set it if it exists + contents <- readLines(path, warn = FALSE) + if (length(contents) == 0L) + return(NULL) + + # set RENV_PROFILE + profile <- contents[[1L]] + if (!profile %in% c("", "default")) + Sys.setenv(RENV_PROFILE = profile) + + profile + + } + + renv_bootstrap_profile_prefix <- function() { + profile <- renv_bootstrap_profile_get() + if (!is.null(profile)) + return(file.path("profiles", profile, "renv")) + } + + renv_bootstrap_profile_get <- function() { + profile <- Sys.getenv("RENV_PROFILE", unset = "") + renv_bootstrap_profile_normalize(profile) + } + + renv_bootstrap_profile_set <- function(profile) { + profile <- renv_bootstrap_profile_normalize(profile) + if (is.null(profile)) + Sys.unsetenv("RENV_PROFILE") + else + Sys.setenv(RENV_PROFILE = profile) + } + + renv_bootstrap_profile_normalize <- function(profile) { + + if (is.null(profile) || profile %in% c("", "default")) + return(NULL) + + profile + + } + + renv_bootstrap_path_absolute <- function(path) { + + substr(path, 1L, 1L) %in% c("~", "/", "\\") || ( + substr(path, 1L, 1L) %in% c(letters, LETTERS) && + substr(path, 2L, 3L) %in% c(":/", ":\\") + ) + + } + + renv_bootstrap_paths_renv <- function(..., profile = TRUE, project = NULL) { + renv <- Sys.getenv("RENV_PATHS_RENV", unset = "renv") + root <- if (renv_bootstrap_path_absolute(renv)) NULL else project + prefix <- if (profile) renv_bootstrap_profile_prefix() + components <- c(root, renv, prefix, ...) + paste(components, collapse = "/") + } + + renv_bootstrap_project_type <- function(path) { + + descpath <- file.path(path, "DESCRIPTION") + if (!file.exists(descpath)) + return("unknown") + + desc <- tryCatch( + read.dcf(descpath, all = TRUE), + error = identity + ) + + if (inherits(desc, "error")) + return("unknown") + + type <- desc$Type + if (!is.null(type)) + return(tolower(type)) + + package <- desc$Package + if (!is.null(package)) + return("package") + + "unknown" + + } + + renv_bootstrap_user_dir <- function() { + dir <- renv_bootstrap_user_dir_impl() + path.expand(chartr("\\", "/", dir)) + } + + renv_bootstrap_user_dir_impl <- function() { + + # use local override if set + override <- getOption("renv.userdir.override") + if (!is.null(override)) + return(override) + + # use R_user_dir if available + tools <- asNamespace("tools") + if (is.function(tools$R_user_dir)) + return(tools$R_user_dir("renv", "cache")) + + # try using our own backfill for older versions of R + envvars <- c("R_USER_CACHE_DIR", "XDG_CACHE_HOME") + for (envvar in envvars) { + root <- Sys.getenv(envvar, unset = NA) + if (!is.na(root)) + return(file.path(root, "R/renv")) + } + + # use platform-specific default fallbacks + if (Sys.info()[["sysname"]] == "Windows") + file.path(Sys.getenv("LOCALAPPDATA"), "R/cache/R/renv") + else if (Sys.info()[["sysname"]] == "Darwin") + "~/Library/Caches/org.R-project.R/R/renv" + else + "~/.cache/R/renv" + + } + + renv_bootstrap_version_friendly <- function(version, shafmt = NULL, sha = NULL) { + sha <- sha %||% attr(version, "sha", exact = TRUE) + parts <- c(version, sprintf(shafmt %||% " [sha: %s]", substring(sha, 1L, 7L))) + paste(parts, collapse = "") + } + + renv_bootstrap_exec <- function(project, libpath, version) { + if (!renv_bootstrap_load(project, libpath, version)) + renv_bootstrap_run(version, libpath) + } + + renv_bootstrap_run <- function(version, libpath) { + + # perform bootstrap + bootstrap(version, libpath) + + # exit early if we're just testing bootstrap + if (!is.na(Sys.getenv("RENV_BOOTSTRAP_INSTALL_ONLY", unset = NA))) + return(TRUE) + + # try again to load + if (requireNamespace("renv", lib.loc = libpath, quietly = TRUE)) { + return(renv::load(project = getwd())) + } + + # failed to download or load renv; warn the user + msg <- c( + "Failed to find an renv installation: the project will not be loaded.", + "Use `renv::activate()` to re-initialize the project." + ) + + warning(paste(msg, collapse = "\n"), call. = FALSE) + + } + + renv_json_read <- function(file = NULL, text = NULL) { + + jlerr <- NULL + + # if jsonlite is loaded, use that instead + if ("jsonlite" %in% loadedNamespaces()) { + + json <- catch(renv_json_read_jsonlite(file, text)) + if (!inherits(json, "error")) + return(json) + + jlerr <- json + + } + + # otherwise, fall back to the default JSON reader + json <- catch(renv_json_read_default(file, text)) + if (!inherits(json, "error")) + return(json) + + # report an error + if (!is.null(jlerr)) + stop(jlerr) + else + stop(json) + + } + + renv_json_read_jsonlite <- function(file = NULL, text = NULL) { + text <- paste(text %||% read(file), collapse = "\n") + jsonlite::fromJSON(txt = text, simplifyVector = FALSE) + } + + renv_json_read_default <- function(file = NULL, text = NULL) { + + # find strings in the JSON + text <- paste(text %||% read(file), collapse = "\n") + pattern <- '["](?:(?:\\\\.)|(?:[^"\\\\]))*?["]' + locs <- gregexpr(pattern, text, perl = TRUE)[[1]] + + # if any are found, replace them with placeholders + replaced <- text + strings <- character() + replacements <- character() + + if (!identical(c(locs), -1L)) { + + # get the string values + starts <- locs + ends <- locs + attr(locs, "match.length") - 1L + strings <- substring(text, starts, ends) + + # only keep those requiring escaping + strings <- grep("[[\\]{}:]", strings, perl = TRUE, value = TRUE) + + # compute replacements + replacements <- sprintf('"\032%i\032"', seq_along(strings)) + + # replace the strings + mapply(function(string, replacement) { + replaced <<- sub(string, replacement, replaced, fixed = TRUE) + }, strings, replacements) + + } + + # transform the JSON into something the R parser understands + transformed <- replaced + transformed <- gsub("{}", "`names<-`(list(), character())", transformed, fixed = TRUE) + transformed <- gsub("[[{]", "list(", transformed, perl = TRUE) + transformed <- gsub("[]}]", ")", transformed, perl = TRUE) + transformed <- gsub(":", "=", transformed, fixed = TRUE) + text <- paste(transformed, collapse = "\n") + + # parse it + json <- parse(text = text, keep.source = FALSE, srcfile = NULL)[[1L]] + + # construct map between source strings, replaced strings + map <- as.character(parse(text = strings)) + names(map) <- as.character(parse(text = replacements)) + + # convert to list + map <- as.list(map) + + # remap strings in object + remapped <- renv_json_remap(json, map) + + # evaluate + eval(remapped, envir = baseenv()) + + } + + renv_json_remap <- function(json, map) { + + # fix names + if (!is.null(names(json))) { + lhs <- match(names(json), names(map), nomatch = 0L) + rhs <- match(names(map), names(json), nomatch = 0L) + names(json)[rhs] <- map[lhs] + } + + # fix values + if (is.character(json)) + return(map[[json]] %||% json) + + # handle true, false, null + if (is.name(json)) { + text <- as.character(json) + if (text == "true") + return(TRUE) + else if (text == "false") + return(FALSE) + else if (text == "null") + return(NULL) + } + + # recurse + if (is.recursive(json)) { + for (i in seq_along(json)) { + json[i] <- list(renv_json_remap(json[[i]], map)) + } + } + + json + + } + + # load the renv profile, if any + renv_bootstrap_profile_load(project) + + # construct path to library root + root <- renv_bootstrap_library_root(project) + + # construct library prefix for platform + prefix <- renv_bootstrap_platform_prefix() + + # construct full libpath + libpath <- file.path(root, prefix) + + # run bootstrap code + renv_bootstrap_exec(project, libpath, version) + + invisible() + +}) diff --git a/renv/settings.json b/renv/settings.json new file mode 100644 index 0000000..ffdbb32 --- /dev/null +++ b/renv/settings.json @@ -0,0 +1,19 @@ +{ + "bioconductor.version": null, + "external.libraries": [], + "ignored.packages": [], + "package.dependency.fields": [ + "Imports", + "Depends", + "LinkingTo" + ], + "ppm.enabled": null, + "ppm.ignored.urls": [], + "r.version": null, + "snapshot.type": "implicit", + "use.cache": true, + "vcs.ignore.cellar": true, + "vcs.ignore.library": true, + "vcs.ignore.local": true, + "vcs.manage.ignores": true +} diff --git a/scripts/QC_nextflow_output.R b/scripts/QC_nextflow_output.R new file mode 100644 index 0000000..688cfae --- /dev/null +++ b/scripts/QC_nextflow_output.R @@ -0,0 +1,408 @@ +#!/usr/bin/env Rscript +# A script to clean NextFlow outputs for final processing +# The same as "NextFlow_Output_QC_Postprocess_1.R" but to be run in a script +# Developed by Dr. Jake Beierle (don't forget the Dr., it's important) + +# ----Documentation---- +# See comprehensive documentation on the github repository +# https://github.com/jacobbeierle/JABS_nextflow_postprocess/tree/main + +# ====================== +# Set up / Libraries / Options +# ====================== + +suppressPackageStartupMessages({ + library(optparse) + library(yaml) + library(tidyverse) + library(writexl) +}) + +# ======================= +# Argument Parser +# ====================== + +option_list <- list( + make_option("--input_dir", type = "character", help = "Input directory (required)"), + make_option("--output_dir", type = "character", help = "Output directory (required)"), + make_option("--param", type = "character", default = NULL, + help = "Optional YAML config file to override defaults"), + + # QC parameters with defaults + make_option("--expected_length", type = "integer", default = 60*60*30 + 5*30, + help = "Expected video length in seconds [default %default]"), + make_option("--max_tracklet_per_hour", type = "integer", default = 6, + help = "Maximum tracklets per hour [default %default]"), + make_option("--max_missing_pose", type = "double", default = 0.005, + help = "Maximum fraction of missing pose [default %default]"), + make_option("--max_missing_segmentation", type = "double", default = 0.2, + help = "Maximum fraction of missing segmentation [default %default]"), + make_option("--max_missing_keypoint", type = "double", default = 0.01, + help = "Maximum fraction of missing keypoints [default %default]"), + make_option("--fecal_boli_quantile_plotting", type = "double", default = 0.05, + help = "Quantile for fecal boli plotting [default %default]") +) + +opt_parser <- OptionParser(option_list = option_list, + description = "QC Reporting Pipeline") + +# Parse command-line arguments +args <- parse_args(opt_parser) + +# ======================= +# Merge parameter priorites +# ====================== + +# Set input, output directories +input.dir <- args$input_dir +output.dir <- args$output_dir + +# Set defaults +params <- list( + expected_length = args$expected_length, + max_tracklet_per_hour = args$max_tracklet_per_hour, + max_missing_pose = args$max_missing_pose, + max_missing_segmentation = args$max_missing_segmentation, + max_missing_keypoint = args$max_missing_keypoint, + fecal_boli_quantile_plotting = args$fecal_boli_quantile_plotting +) + +# Override with YAML (if provided) +if (!is.null(args$param)) { + yaml_vals <- yaml::read_yaml(args$param) + params <- modifyList(params, yaml_vals) +} + +# Print final configuration +cat("=== QC CONFIGURATION ===\n") +cat("Input directory: ", input.dir, "\n") +cat("Output directory:", output.dir, "\n") +cat("QC Parameters:\n") +print(params) + +# ================== +# Create output directories +# ================== + +for (subdirectory in c("final_nextflow_feature_data", + "qc/nextflow_qc_logs", + "qc/missing_or_dup_data", + "qc/qc_figs")) { + dir.path = file.path(output.dir, subdirectory) + dir.create(dir.path, recursive = T, showWarnings = F) +} + +# ================ +# Process and Publish QC logs with success or failure annotated in a CSV +# ================ +# Read QC files in NextFlow_Output directory +qc_log <- list.files( + path = "~/kumar-group/SING-grant/NextflowOutput/", + pattern = "qc_batch_", + full.names = TRUE, + recursive = TRUE + ) %>% + read_csv(id = "QC_file", show_col_types = FALSE) + +# Record why QC failed for each video +qc_log <- qc_log %>% + mutate(passed_duration_QC = video_duration == params$expected_length, + passed_tracklet_QC = pose_tracklets < params$max_tracklet_per_hour * params$expected_length / 108000, + passed_segmentation_QC = seg_counts > (1 - params$max_missing_segmentation) * params$expected_length, + passed_pose_QC = pose_counts > (1 - params$max_missing_pose) * params$expected_length, + passed_kp_QC = missing_keypoint_frames < params$max_missing_keypoint * params$expected_length) + +# Apply thresholds defined above to create a separate 'failed QC' data frame +qc_log.failed <- qc_log %>% + filter(if_any(starts_with("passed_"), ~ !.x)) + +# Write final Nextflow QC files for review by a human +write.csv(qc_log, file.path(output.dir, "qc/nextflow_qc_logs/qc_all.csv"), row.names = FALSE) +write.csv(qc_log.failed, file.path(output.dir, "qc/nextflow_qc_logs/qc_failed.csv"), row.names = FALSE) + +# List of expected videos from the QC log files +expected_videos <- qc_log$video_name |> + gsub("_with_fecal_boli", "", x = _) |> + gsub("_filtered", "", x = _) |> + sub("^/", "", x = _) |> + unique() # Remove duplicate + +# ================== +# Helper functions for processing output data +# ================== +read_raw_data <- function(input_dir, pattern) { + ########################################################################## + # Read in multiple CSV files from a directory and harmonize the ID column + # + # Args: + # input_dir: path to the folder containing CSV files + # pattern: regex pattern to match file names + # + # Returns: + # A tibble with: + # - NetworkFilename as the first column + # - Cleaned NetworkFilename (no "_corrected", "_filtered", ".avi", leading "." or "/") + # + # Notes: + # - If NetworkFilename does not exist, the first column is used as ID + # - Useful for harmonizing output from different workflows before merging + ########################################################################## + + # Read in data from multiple csv files of the same patterns + raw_data <- list.files(path = input_dir, pattern = pattern, + recursive = T, full.names = T) |> + read_csv(show_col_types = FALSE) + + # Use NetworkFilename as the ID column (move it to the first column if not already so) + if ("NetworkFilename" %in% names(raw_data)) { + raw_data <- relocate(raw_data, NetworkFilename, .before = 1) + } else { + colnames(raw_data)[1] <- "NetworkFilename" + } + + # Clean and harmonize the NetworkFilename across data + raw_data$NetworkFilename <- raw_data$NetworkFilename |> + gsub("_corrected", "", x = _) |> + gsub("_filtered", "", x = _) |> + gsub("\\.avi$", "", x = _) |> + sub("^\\.", "", x = _) |> + sub("^/", "", x = _) + + return(raw_data) +} + +check_missing_and_dup <- function(expected_videos, data_df, corr_thres = 0.99) { + ########################################################################## + # Check for missing videos and duplicated rows in a dataset + # + # Args: + # expected_videos: character vector of expected NetworkFilename values + # data_df: dataframe containing a NetworkFilename column and data + # corr_thres: a number for how much correlated 2 rows are to be flagged + # + # Returns: + # A list with three elements: + # - missing_qc: videos present in expected_videos but missing in output_file + # - missing_output: videos present in output_file but missing in expected_videos + # - dup_data: rows in output_file that are duplicated (ignoring NetworkFilename), + # after rounding numeric columns to zero digits + # + # Notes: + # - Useful for QC of experimental datasets, e.g., fecal boli or gait data + # - Rounds numeric columns before checking for duplicates to account for minor differences + ########################################################################## + + video_missing_output <- setdiff(expected_videos, data_df$NetworkFilename) + video_missing_in_qc <- setdiff(data_df$NetworkFilename, expected_videos) + + # Check for rows with identical data (i.e. something went wrong in video recording) + dup_idx <- which(duplicated(data_df[, -1]) | duplicated(data_df[, -1], fromLast = TRUE)) + + # Check for rows with high correlation + corr_mat <- data_df %>% + dplyr::select(where(is.numeric)) %>% + scale() %>% t() %>% + cor(use = "pairwise.complete.obs") + + # correlated row pairs above threshold + row_pairs <- which(corr_mat > corr_thres & row(corr_mat) != col(corr_mat), arr.ind = TRUE) + cor_idx <- unique(c(row_pairs[,1], row_pairs[,2])) + + # UNION: rows that are duplicated OR highly correlated + all_idx <- sort(unique(c(dup_idx, cor_idx))) + + duplicated_rows <- data_df[all_idx,] + + return(list(missing_qc = video_missing_in_qc, + missing_output = video_missing_output, + dup_data = duplicated_rows)) +} + +# ================== +# Process fecal boli data +# ================== +# Concatenate all instances +fecal_boli.raw <- read_raw_data(input_dir = "~/kumar-group/SING-grant/NextflowOutput/", + pattern = "fecal_boli.csv") + +# Check for missing and duplicated data +fecal_boli.summary <- check_missing_and_dup(expected_videos = expected_videos, + data_df = fecal_boli.raw) +# Write out all raw, merged fecal boli counts +write.csv(fecal_boli.raw, file.path(output.dir, "final_nextflow_feature_data/fecal_boli_raw.csv"), row.names = FALSE) + +# ================ +# Fecal boli QC plots +# ================ +# Pivot longer to facilitate plotting for QC +fecal_boli.plot <- fecal_boli.raw |> + pivot_longer( + cols = !c(NetworkFilename, nextflow_version), + names_to = "min", + values_to = "fecal_boli", + values_drop_na = TRUE) |> + mutate(min = parse_number(min)) + +# Plot fecal boli QC measures +outFileNamePDF <- file.path(output.dir, "qc/qc_figs/fecal_boli_qc_figs.pdf") +pdf(outFileNamePDF, 6, 6) + +# Growth curve for all mice +ggplot(fecal_boli.plot, aes(min, fecal_boli, group = NetworkFilename, colour = NetworkFilename))+ + geom_line() + + labs(title = "Fecal boli growth, all mice") + + theme(legend.position = "none") + +# Plot mice with lowest fecal boli +fecal_boli.plot |> + summarise(across(fecal_boli, max), .by = NetworkFilename) |> + slice_min(fecal_boli, prop = params$fecal_boli_quantile_plotting) |> + select(NetworkFilename) |> + merge(fecal_boli.plot, by.x = "NetworkFilename") |> + ggplot(aes(min, fecal_boli, group = NetworkFilename, colour = NetworkFilename))+ + geom_line() + + labs(title = paste("Lowest ", params$fecal_boli_quantile_plotting*100, "% of fecal boli mice", sep = "")) + + theme(legend.position = "none") + +# Plot mice with highest fecal boli +fecal_boli.plot |> + summarise(across(fecal_boli, max), .by = NetworkFilename) |> + slice_max(fecal_boli, prop = params$fecal_boli_quantile_plotting) |> + select(NetworkFilename) |> + merge(fecal_boli.plot, by.x = "NetworkFilename") |> + ggplot(aes(min, fecal_boli, group = NetworkFilename, colour = NetworkFilename))+ + geom_line() + + labs(title = paste("Highest ", params$fecal_boli_quantile_plotting*100, "% of fecal boli mice", sep = "")) + + theme(legend.position = "none") + +# Histogram of final fecal boli count +fecal_boli.plot |> + arrange(desc(min)) |> + distinct(NetworkFilename, .keep_all = TRUE) |> + ggplot(aes(fecal_boli))+ + geom_histogram(binwidth = 1, boundary = 0)+ + labs(title = "Fecal boli highest bin, all mice") + + ylab("count") + +invisible(dev.off()) + +# =============== +# Process Gait Data +# =============== +# Import Gait Data +gait.raw <- read_raw_data(input_dir = "~/kumar-group/SING-grant/NextflowOutput/", + pattern = "gait.csv") + +video_level_metrics = c("Distance Traveled", "Body Length", "Speed", "Speed Variance", "nextflow_version") + +gait.wide_format <- gait.raw %>% + # Remove variance measures from speed bins with fewer than 3 strides + mutate(across(.cols = contains("Variance") & !all_of(video_level_metrics), + .fns = ~ ifelse(`Stride Count` < 3, NA, .x))) %>% + # Convert to wide format + pivot_wider(id_cols = c(NetworkFilename, all_of(video_level_metrics)), + names_from = `Speed Bin`, + values_from = -c(NetworkFilename, all_of(video_level_metrics), `Speed Bin`), + names_sep = ".") %>% + mutate(across(.cols = c(`Stride Count.10`, `Stride Count.15`, `Stride Count.20`, `Stride Count.25`), + .fns = ~ replace_na(.x, 0))) + +# Check for missing and duplicated data with gait +gait.summary <- check_missing_and_dup(expected_videos = expected_videos, + data_df = gait.wide_format) + +#output to final CSV +write.csv(gait.wide_format, file.path(output.dir, "final_nextflow_feature_data/gait_final.csv"), row.names = FALSE) + +# ================= +# Process JABS Feature Data +# ================= +JABS.features <- read_raw_data(input_dir = "~/kumar-group/SING-grant/NextflowOutput/", + pattern = "features.csv") + +# Check for missing data in JABS.features +JABS.features.summary <- check_missing_and_dup(expected_videos = expected_videos, + data_df = JABS.features) + +# Write the final csv +write.csv(JABS.features, file.path(output.dir, "final_nextflow_feature_data/JABS_features_final.csv"), row.names = FALSE) + +# ==================== +# Process morphometrics feature data +# ==================== +morpho.raw <- read_raw_data(input_dir = "~/kumar-group/SING-grant/NextflowOutput/", + pattern = "morphometrics.csv") + +# Check for missing and duplicated data in morphometric outputs +morpho.summary <- check_missing_and_dup(expected_videos = expected_videos, + data_df = morpho.raw) + +# ================= +# Report and output data for all warnings +# ================= +# Videos in QC but not in output +all_missing_data <- list("fecal_boli" = fecal_boli.summary$missing_output, + "gait" = gait.summary$missing_output, + "JABS_features" = JABS.features.summary$missing_output, + "Morphometrics" = morpho.summary$missing_output) + +# Videos in output but not in QC +videos_not_in_qc_report <- list("fecal_boli" = fecal_boli.summary$missing_qc, + "gait" = gait.summary$missing_qc, + "JABS_features" = JABS.features.summary$missing_qc, + "morphometrics" = morpho.summary$missing_qc) + +# Output data that is duplicated in the data frames +all_duplicated_data <- list("fecal_boli" = fecal_boli.summary$dup_data, + "gait" = gait.summary$dup_data, + "JABS_features" = JABS.features.summary$dup_data, + "morphometrics" = morpho.summary$dup_data) + +no_missing_output <- all(sapply(all_missing_data, length) == 0) +no_missing_qc <- all(sapply(videos_not_in_qc_report, length) == 0) +no_dups <- all(sapply(all_duplicated_data, nrow) == 0) + +# Summarize and write warnings +cat("=== FINAL ERROR REPORT ===\n") +if (no_missing_output && no_missing_qc && no_dups) { + cat("No errors to report\n") + # Create placeholder files + write.csv("No data missing", file.path(output.dir, "qc/missing_or_dup_data/missing_data.csv"), row.names = FALSE) + write.csv("No data missing", file.path(output.dir, "qc/missing_or_dup_data/videos_not_in_qc_report.csv"), row.names = FALSE) + write_xlsx(as.data.frame("No duplicated data"), file.path(output.dir, "qc/missing_or_dup_data/duplicated_data.xlsx"), row.names = FALSE) +} else { + + # Check for missing output data + if (no_missing_output) { + write.csv("No data missing", file.path(output.dir, "qc/missing_or_dup_data/missing_data.csv"), row.names = FALSE) + } else { + all_missing_data <- all_missing_data %>% + enframe(., name = "outputType", value = "video_path") %>% + unnest(cols = video_path) %>% + mutate(missing = TRUE) %>% + pivot_wider(id_cols = video_path, names_from = outputType, values_from = missing) + write.csv(all_missing_data, file.path(output.dir, "qc/missing_or_dup_data/missing_data.csv"), row.names = FALSE) + cat(paste("Missing", colnames(all_missing_data)[-1], "data"), sep = "\n") + } + + # Check for missing QC data + if (no_missing_qc) { + write.csv("No data missing", file.path(output.dir, "qc/missing_or_dup_data/videos_not_in_qc_report.csv"), row.names = FALSE) + } else { + videos_not_in_qc_report <- videos_not_in_qc_report %>% + enframe(., name = "outputType", value = "video_path") %>% + unnest(cols = video_path) %>% + mutate(missing = TRUE) %>% + pivot_wider(id_cols = video_path, names_from = outputType, values_from = missing) + write.csv(videos_not_in_qc_report, file.path(output.dir, "qc/missing_or_dup_data/videos_not_in_qc_report.csv"), row.names = FALSE) + cat(paste("Missing video in QC for", colnames(videos_not_in_qc_report)[-1], "data"), sep = "\n") + } + + # Check for duplicated data + if (no_dups) { + write_xlsx(as.data.frame("No duplicated data"), file.path(output.dir, "qc/missing_or_dup_data/duplicated_data.xlsx"), row.names = FALSE) + } else { + write_xlsx(all_duplicated_data, path = file.path(output.dir, "qc/missing_or_dup_data/duplicated_data.xlsx")) + cat(paste("Duplicated data for", names(all_duplicated_data)[sapply(all_duplicated_data, nrow) != 0]), sep = "\n") + } +}