Skip to content

Commit dfeba33

Browse files
authored
Merge pull request #91 from pinellolab/v2.1.9
v2.1.9
2 parents fa26384 + f8c9cc1 commit dfeba33

File tree

4 files changed

+14
-34
lines changed

4 files changed

+14
-34
lines changed

PostProcess/adjust_cols.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737

3838
# read file to adjust in chunks
3939
chunksize_ = 100000
40-
chunks = pd.read_csv(inFile, sep="\t", chunksize=chunksize_)
40+
chunks = pd.read_csv(inFile, sep="\t", chunksize=chunksize_, low_memory=False)
4141

4242
# write header the first time a chuck is processed
4343
header = True

PostProcess/submit_job_automated_new_multiple_vcfs.sh

Lines changed: 10 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -354,21 +354,11 @@ while read vcf_f; do
354354
pid_search_ref=$!
355355
pids+=("$pid_search_ref") # add reference search pid
356356
names+=("Reference") # add pid identifier
357-
if [ -s $logerror ]; then
358-
printf "ERROR: off-targets search on reference genome failed\n" >&2
359-
rm -f $output_folder/*.targets.txt $output_folder/*profile* # delete results folder
360-
exit 1
361-
fi
362357
else # consider dna/rna bulges (not combined)
363358
crispritz.py search "$current_working_directory/Genomes/${ref_name}/" "$pam_file" "$guide_file" "${ref_name}_${pam_name}_${guide_name}_${mm}_${bDNA}_${bRNA}" -mm $mm -r -th $ceiling_result &
364359
pid_search_ref=$!
365360
pids+=("$pid_search_ref") # add reference search pid
366361
names+=("Reference") # add pid identifier
367-
if [ -s $logerror ]; then
368-
printf "ERROR: off-targets search (no bulges) on reference genome failed\n" >&2
369-
rm -f $output_folder/*.targets.txt $output_folder/*profile* # delete results folder
370-
exit 1
371-
fi
372362
fi
373363
echo -e 'Search Reference completed'
374364
else
@@ -384,22 +374,11 @@ while read vcf_f; do
384374
pid_search_var=$!
385375
pids+=("$pid_search_var") # add variants search pid
386376
names+=("Variant") # add pid identifier
387-
if [ -s $logerror ]; then
388-
printf "ERROR: off-targets search on alternative genome failed on variants in %s\n" "$vcf_name" >&2
389-
rm -r $output_folder/*.targets.txt $output_folder/*profile* # delete results folder
390-
exit 1
391-
fi
392-
echo -e 'Search Variant\tEnd\t'$(date) >>$log
393377
else # consider bulges
394378
crispritz.py search "$current_working_directory/Genomes/${ref_name}+${vcf_name}/" "$pam_file" "$guide_file" "${ref_name}+${vcf_name}_${pam_name}_${guide_name}_${mm}_${bDNA}_${bRNA}" -mm $mm -r -th $ceiling_result &
395379
pid_search_var=$!
396380
pids+=("$pid_search_var") # add variants search pid
397381
names+=("Variant") # add pid identifier
398-
if [ -s $logerror ]; then
399-
printf "ERROR: off-targets search (no bulges) on alternative genome failed on variants in %s\n" "$vcf_name" >&2
400-
rm -r $output_folder/*.targets.txt $output_folder/*profile* # delete results folder
401-
exit 1
402-
fi
403382
fi
404383
else
405384
echo -e "Search for variant already done"
@@ -411,17 +390,13 @@ while read vcf_f; do
411390
cd $starting_dir
412391
# TODO: REMOVE POOL SCRIPT FROM PROCESSING
413392
./pool_search_indels.py "$ref_folder" "$vcf_folder" "$vcf_name" "$guide_file" "$pam_file" $bMax $mm $bDNA $bRNA "$output_folder" $true_pam "$current_working_directory/" "$ncpus"
414-
if [ -s $logerror ]; then
415-
printf "ERROR: off-targets search on indels failed on variants in %s\n" "$vcf_name" >&2
416-
rm -r $output_folder/*.targets.txt $output_folder/*profile* # delete results folder
417-
exit 1
418-
fi
419393
awk '($3 !~ "n") {print $0}' "$output_folder/indels_${ref_name}+${vcf_name}_${pam_name}_${guide_name}_${mm}_${bDNA}_${bRNA}.targets.txt" >"$output_folder/indels_${ref_name}+${vcf_name}_${pam_name}_${guide_name}_${mm}_${bDNA}_${bRNA}.targets.txt.tmp"
420394
mv "$output_folder/indels_${ref_name}+${vcf_name}_${pam_name}_${guide_name}_${mm}_${bDNA}_${bRNA}.targets.txt.tmp" "$output_folder/indels_${ref_name}+${vcf_name}_${pam_name}_${guide_name}_${mm}_${bDNA}_${bRNA}.targets.txt"
421-
echo -e "Search INDELs completed"
422395
else
423396
echo -e "Search INDELs already done"
424397
fi
398+
echo -e "Search INDELs completed"
399+
425400
fi
426401

427402
# wait for jobs completion
@@ -430,16 +405,21 @@ while read vcf_f; do
430405
name="${names[$i]}"
431406

432407
if wait "$pid"; then
433-
echo -e "Search $name \End\t"$(date) >>$log # off-targets search on reference/variant genome
408+
if [ -s $logerror ]; then
409+
echo "ERROR: off-targets search ${name} failed\n" >&2
410+
rm -f $output_folder/*.targets.txt $output_folder/*profile* # delete results folder
411+
exit 1
412+
fi
413+
echo -e "Off-targets search $name\tEnd\t"$(date) >>$log # off-targets search on reference/variant genome
434414
else
435-
echo "ERROR: search $name failed" >&2
415+
echo "ERROR: Off-targets search $name failed" >&2
436416
exit 1
437417
fi
438418
done
439419
echo -e 'Off-targets search\tEnd\t'$(date) >>$log
440420
# move all targets into targets directory
441421
if [ -d "${output_folder}/crispritz_targets" ]; then
442-
mv $output_folder/*.targets.txt $output_folder/crispritz_targets
422+
mv $output_folder/*.targets.txt $output_folder/crispritz_targets &>/dev/null
443423
fi
444424
# move profiles into profile folder
445425
if ! [ -d "$output_folder/crispritz_prof" ]; then

PostProcess/utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -101,8 +101,8 @@
101101
"hgdp_wgs.20190516.full.chrY.vcf.gz": "54b3aba28600c8d0d8a695c8dcfdc4cd",
102102
}
103103
MD5ANNOTATION = {
104-
"dhs+encode+gencode.hg38.bed.tar.gz": "4f5eb631af903d4091bb2f57558c7b46",
105-
"gencode.protein_coding.bed.tar.gz": "04297ade436db70784733a5b13d42723",
104+
"dhs+encode+gencode.hg38.bed.tar.gz": "d3325e347c731b7c24c579a91b447b1b",
105+
"gencode.protein_coding.bed.tar.gz": "c6747bf2610ff144daafc8b02cef251d",
106106
}
107107
MD5SAMPLES = {
108108
"samplesIDs.1000G.txt": "720af666c9a938de74a2808033aa4509",

crisprme.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
import re
1111

1212

13-
version = "2.1.8" # CRISPRme version; TODO: update when required
13+
version = "2.1.9" # CRISPRme version; TODO: update when required
1414
__version__ = version
1515

1616
script_path = os.path.dirname(os.path.abspath(__file__))

0 commit comments

Comments
 (0)