|
| 1 | +#!/usr/bin/env bash |
| 2 | + |
| 3 | +## This script removes the results for an individiaul while maintaining the nextflow process cache for them. |
| 4 | +## It is intended as a way to refresh the results directories of an individual. This can be useful either |
| 5 | +## to remove older files after additional libraries appear and are therefore merged, or to remove results |
| 6 | +## with misleading names in cases where Pandora entries get updated (e.g. protocol mixup leading to changes |
| 7 | +## in strandedness for a library). |
| 8 | + |
| 9 | +## Helptext function |
| 10 | +function Helptext() { |
| 11 | + echo -ne "\t usage: $0 [options] <ind_id_list>\n\n" |
| 12 | + echo -ne "This script removes all output directory contents for the provided individuals, without clearing out caching, allowing for the results to be re-published.\n This enables refreshing of result directories when changes to the input might have changes merging of libraries, thus making the directory structure inconsistent.\n\n" |
| 13 | + echo -ne "Options:\n" |
| 14 | + echo -ne "-h, --help\t\tPrint this text and exit.\n" |
| 15 | + echo -ne "-a, --analysis_type\t\tSet the analysis type. Options: TF, SG.\n" |
| 16 | +} |
| 17 | + |
| 18 | +## Print messages to stderr, optionally with colours |
| 19 | +function errecho() { |
| 20 | + local Normal |
| 21 | + local Red |
| 22 | + local Yellow |
| 23 | + local colour |
| 24 | + |
| 25 | + Normal=$(tput sgr0) |
| 26 | + Red=$(tput sgr0)'\033[1;31m' ## Red normal face |
| 27 | + Yellow=$(tput sgr0)'\033[1;33m' ## Yellow normal face |
| 28 | + |
| 29 | + colour='' |
| 30 | + if [[ ${1} == '-y' ]]; then |
| 31 | + colour="${Yellow}" |
| 32 | + shift 1 |
| 33 | + elif [[ ${1} == '-r' ]]; then |
| 34 | + colour="${Red}" |
| 35 | + shift 1 |
| 36 | + fi |
| 37 | + echo -e ${colour}$*${Normal} 1>&2 |
| 38 | +} |
| 39 | + |
| 40 | +## Parse CLI args. |
| 41 | +TEMP=`getopt -q -o ha: --long analysis_type:,help -n 'clear_results.sh' -- "$@"` |
| 42 | +eval set -- "$TEMP" |
| 43 | + |
| 44 | +## Default parameters |
| 45 | +ind_id_list_fn='' |
| 46 | +analysis_type='' |
| 47 | + |
| 48 | +## Read in CLI arguments |
| 49 | +while true ; do |
| 50 | + case "$1" in |
| 51 | + -h|--help) Helptext; exit 0 ;; |
| 52 | + -a|--analysis_type) analysis_type="${2}"; shift 2;; |
| 53 | + --) ind_id_list_fn="${2}"; break ;; |
| 54 | + *) echo -e "invalid option provided: $1.\n"; Helptext; exit 1;; |
| 55 | + esac |
| 56 | +done |
| 57 | + |
| 58 | +## Validate inputs |
| 59 | +if [[ ${ind_id_list_fn} == '' ]]; then |
| 60 | + errecho "No individual ID list provided.\n" |
| 61 | + Helptext |
| 62 | + exit 1 |
| 63 | +fi |
| 64 | + |
| 65 | +if [[ ${analysis_type} == '' ]]; then |
| 66 | + errecho "No --analysis_type was provided.\n" |
| 67 | + Helptext |
| 68 | +elif [[ ${analysis_type} != "SG" && ${analysis_type} != "TF" ]]; then |
| 69 | + errecho "analysis_type must be SG or TF. You provided: ${analysis_type}\n" |
| 70 | + Helptext |
| 71 | +fi |
| 72 | + |
| 73 | +root_eager_dir='/mnt/archgen/Autorun_eager/eager_outputs' ## Directory should include subdirectories for each analysis type (TF/SG) and sub-subdirectories for each site and individual. |
| 74 | + |
| 75 | +## Read all individual IDs into an array |
| 76 | +input_iids=($(cat ${ind_id_list_fn})) |
| 77 | + |
| 78 | +## Remove all dirs except for 'work' and 'pipeline_info'. |
| 79 | +## Both needed for caching. |
| 80 | +## Also leave '1240k.imputed' and 'GTL_output' alone. |
| 81 | +for ind_id in ${input_iids[@]}; do |
| 82 | + site_id=${ind_id:0:3} ## Site id is the first three characters of the individual ID |
| 83 | + dirs_to_delete=$(ls -1 -d ${root_eager_dir}/${analysis_type}/${site_id}/${ind_id}/* | grep -vw -e 'work' -e '1240k.imputed' -e 'GTL_output' -e 'pipeline_info') |
| 84 | + for dir in ${dirs_to_delete}; do |
| 85 | + errecho "Deleting results in: ${dir}" |
| 86 | + rm -r ${dir} ## Delete the specific result directory and all its contents |
| 87 | + done |
| 88 | +done |
0 commit comments