1.1.3 (#17)

TCLamnidis · web-flow · commit 11d1434bf747 · 2023-03-17T16:36:17.000+01:00
* prep changelog

* Fix PR template

* WIP parallelise eager job submission

* Correct syntax error

* No printing to screen for arrays. fix whitespace

* fix number of jobs

* make executable

* Add array qsub command

* update .gitignore

* print qsub command before submission

* Fix job naming

* Initial commit of poseidon package creation

* Rscript to fill in janno and overwrite columns

* Bugfixes

* Add suffix option and correct utput janno path

* move script

* Add janno recreation. Other minor changes

* Add pandora results to janno

* Add log info. New pacakge creation completed.

* Minor changes. Add Library_Names column

* Add script to mirror Population and Sex from janno to fam/ind

* Update CHANGELOG.md

* Update package updating.

* Add debug option. Add AE version in poseidon pkgs

* Remove debug cause of clash. Error when update fails.

* Update CHANGELOG.md

* Only delete temp files if validation passed.

* Bugfix.Runs now updated only if a change in the data occurs.

* move update script to scripts/

* Server-side testing paths

* Add path to trident executable

* server-paths

* Bump version

* Add environment yml file

* Update CHANGELOG.md

* Update output folder to live

* increase resources for AE_spawner jobs

* More resource tweaking for array jobs

* Increase memory further

* Remove path from environment yml

* Bump version

* Match Run_ID, not Batch_ID

* Array log subdir

* Update CHANGELOG.md

* prep CHANGELOG.md

* 40G memory max for array job

* indentation fix

* correct column naming

* document changes

* correct Nr_libs in column selection

* correct paths

* Update .gitignore

* Optimisation. Version bump. Distinct iids used for joining.

* Bump version

* Update CHANGELOG.md

* bump version

* Add mention of memory changes
diff --git a/.gitignore b/.gitignore
@@ -8,4 +8,10 @@ eager_outputs/
 .Rproj.user
 .nfs*
 dev/
-test_data/
+test_data/
+*.*.results.txt
+*Autorun_eager_queue.txt
+.tmp/
+eager_inputs_old/
+eager_outputs_old/
+array_Logs/
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -3,6 +3,19 @@
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [1.1.3] - 17/03/2023
+
+### `Added`
+
+### `Fixed`
+ - Column naming in `fill_in_janno.R`. `Nr_Libs` -> `Nr_Libraries`.
+ - `prepare_eager_tsv.R` no longer joins with non-unique iids. Optimised performance and less likely to kill the TSV maker.
+ - Increased memory given to eager spawner array jobs.
+
+### `Dependencies`
+
+### `Deprecated`
+
 ## [1.1.2] - 02/01/2023
 
 ### `Added`
diff --git a/scripts/fill_in_janno.R b/scripts/fill_in_janno.R
@@ -111,8 +111,8 @@ poseidon_tsv_cols <- tsv_dat %>% dplyr::select(Sample_Name, Library_ID, Stranded
       unique(UDG_Treatment) %>% length(.) > 1 ~ 'mixed',
       TRUE ~ unique(UDG_Treatment)
     ),
-    Nr_Libs=dplyr::n(),
-    Capture_Type=paste0(rep("1240K", Nr_Libs), collapse=";"),
+    Nr_Libraries=dplyr::n(),
+    Capture_Type=paste0(rep("1240K", Nr_Libraries), collapse=";"),
     Library_Built=dplyr::case_when(
       Strandedness == 'single' ~ 'ss',
       Strandedness == 'double' ~ 'ds',
@@ -188,7 +188,7 @@ updated_columns <- eager2poseidon::compile_eager_result_tables(
     "Contamination_Meas",
     "Damage",
     "UDG",
-    "Nr_Libs",
+    "Nr_Libraries",
     "Library_Names", ## Column including all the Library_IDs merged into these genotypes
     "Library_Built",
     "Capture_Type"
diff --git a/scripts/prepare_eager_tsv.R b/scripts/prepare_eager_tsv.R
@@ -43,10 +43,10 @@ save_ind_tsv <- function(data, rename, output_dir, ...) {
   if (!dir.exists(ind_dir)) {write(paste0("[prepare_eager_tsv.R]: Creating output directory '",ind_dir,"'"), stdout())}
   
   dir.create(ind_dir, showWarnings = F, recursive = T) ## Create output directory and subdirs if they do not exist.
- data %>% select(-individual.Full_Individual_Id) %>%  readr::write_tsv(file=paste0(ind_dir,"/",ind_id,".tsv")) ## Output structure can be changed here.
+  data %>% select(-individual.Full_Individual_Id) %>%  readr::write_tsv(file=paste0(ind_dir,"/",ind_id,".tsv")) ## Output structure can be changed here.
 
   ## Print Autorun_eager version to file
-  AE_version <- "1.1.2"
+  AE_version <- "1.1.3"
   cat(AE_version, file=paste0(ind_dir,"/autorun_eager_version.txt"), fill=T, append = F)
 }
 
@@ -124,13 +124,13 @@ complete_pandora_table <- join_pandora_tables(
   convert_all_ids_to_values(., con = con) %>%
   filter(sample.Ethically_culturally_sensitive == FALSE) ## Exclude ethically/culturally sensitive data. Conservative since it excludes NAs
 
-tibble_input_iids <- complete_pandora_table %>% filter(sequencing.Run_Id == sequencing_batch_id) %>% select(individual.Full_Individual_Id)
+tibble_input_iids <- complete_pandora_table %>% filter(sequencing.Run_Id == sequencing_batch_id) %>% select(individual.Full_Individual_Id) %>% distinct()
 
 ## Pull information from pandora, keeping only matching IIDs and requested Sequencing types.
 results <- inner_join(complete_pandora_table, tibble_input_iids, by=c("individual.Full_Individual_Id"="individual.Full_Individual_Id")) %>%
   filter(grepl(paste0("\\.", analysis_type), sequencing.Full_Sequencing_Id), analysis.Analysis_Id == autorun_name_from_analysis_type(analysis_type)) %>%
   select(individual.Full_Individual_Id,individual.Organism,library.Full_Library_Id,library.Protocol,analysis.Result_Directory,sequencing.Sequencing_Id,sequencing.Full_Sequencing_Id,sequencing.Single_Stranded) %>%
-  distinct() %>% ## TODO comment: would be worrying if not already unique, maybe consider throwing a warn?
+  distinct() %>% ## Need distinct() call because of hoe analysis tab is read in, which created one copy of each row per analysis field.
   group_by(individual.Full_Individual_Id) %>%
   filter(!is.na(analysis.Result_Directory)) %>% ## Exclude individuals with no results directory (seem to mostly be controls)
   mutate(
diff --git a/scripts/update_poseidon_package.sh b/scripts/update_poseidon_package.sh
@@ -1,6 +1,6 @@
 #!/usr/bin/env bash
 
-VERSION="1.1.2"
+VERSION="1.1.3"
 
 ## Colours for printing to terminal
 Yellow=$(tput sgr0)'\033[1;33m' ## Yellow normal face