-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgenerateSubmission.sh
More file actions
executable file
·109 lines (88 loc) · 2.58 KB
/
generateSubmission.sh
File metadata and controls
executable file
·109 lines (88 loc) · 2.58 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
!/bin/bash
usage()
{
echo "Usage: $0 [-o --overwrite] [-d PUBLISH_DATE (yyyy-mm-dd)] [-g PUBLISH-GROUP] [-s SCV-FILE]"
exit 2
}
exists()
{
for file in "$@"; do
if [[ -f $file ]]; then
# file found, return true
return 0
fi
done
return -1
}
#########################
# Main script starts here
unset PUBLISH_DATE PUBLISH_GROUP SCV_FILE
OVERWRITE=false
options=':od:g:s:h'
while getopts $options option
do
case $option in
o ) OVERWRITE=true;;
d ) PUBLISH_DATE=$OPTARG;;
g ) PUBLISH_GROUP=$OPTARG;;
s ) SCV_FILE=$OPTARG;;
h ) usage; exit;;
\? ) echo "Unknown option: -$OPTARG" >&2; exit 1;;
: ) echo "Missing option argument for -$OPTARG" >&2; exit 1;;
* ) echo "Unimplemented option: -$OPTARG" >&2; exit 1;;
esac
done
shift $(($OPTIND - 1))
# make sure the publish_date and publish_group argument are passed
if [ -z "$PUBLISH_DATE" -o -z "$PUBLISH_GROUP" ]
then
echo "Both -d publish_date and -g publish_group are required."
exit 1
fi
PUBLISH_GROUP=$(echo $PUBLISH_GROUP | awk '{print toupper($0)}')
tmpdir="tmpwork"
inputgzip="$PUBLISH_GROUP-CGSEPIO-$PUBLISH_DATE.gzip"
outputjson="$PUBLISH_GROUP-ALL-$PUBLISH_DATE.json"
submissioncsv="$PUBLISH_GROUP-SUBMISSION-$PUBLISH_DATE.csv"
runreportcsv="$PUBLISH_GROUP-RUN-REPORT-$PUBLISH_DATE.csv"
if [ -n "$SCV_FILE" ]; then
if [ ! -f "$SCV_FILE" ]; then
echo "Could not find the SCV file - $SCV_FILE"
exit 1
fi
fi
# make a tmp work directory and remove when finished
if [ -d "$tmpdir" ]; then
rm -fR "$tmpdir"
fi
mkdir "$tmpdir"
# expand the cg-sepio-* json files
tar -xvf "$inputgzip" --directory "$tmpdir"
# seed file with wrapper brace and VariantInterpretation array
cd $tmpdir
rm -f tmpfile
echo '{"@context": "http://dataexchange.clinicalgenome.org/interpretation/json/context",' > "$outputjson"
echo '"VariantPathogenicityInterpretation":[' >> "$outputjson"
# Loop through product of gcsplit operation above...
for filename in cg-sepio-*;
do
printf "%s\n" "$filename"
sed -e '2d' "$filename" > "tmpfile"
cat "tmpfile" >> "$outputjson"
echo ',' >> "$outputjson"
done
# remove last comma
sed -i '' -e '$ d' "$outputjson"
# append enclosing bracket and brace to contain all VariantIntepretations.
echo ']}' >> "$outputjson"
rm -f "tmpfile"
cd ..
if [ -n "$SCV_FILE" ]; then
lein run "-f" "-o" "$submissioncsv" "-r" "$runreportcsv" "-c" "$SCV_FILE" "$tmpdir/$outputjson"
else
lein run "-f" "-o" "$submissioncsv" "-r" "$runreportcsv" "$tmpdir/$outputjson"
fi
# remove temp directory
if [ -d "$tmpdir" ]; then
rm -fR "$tmpdir"
fi