@@ -169,9 +169,8 @@ while read vcf_f; do
169169 dict_folder=" $current_working_directory /Dictionaries/dictionaries_${vcf_name} /"
170170 indel_dict_folder=" $current_working_directory /Dictionaries/log_indels_${vcf_name} /"
171171 indels_out=" $current_working_directory /Genomes/${ref_name} +${vcf_name} _INDELS"
172- indels_index_dir=" $current_working_directory /genome_library/${true_pam} _2_${ref_name} +${vcf_name} _INDELS"
173172
174- if ! [ -d " $enriched_folder " ]; then
173+ if ! [ -d " $enriched_folder " ]; then
175174 echo -e ' Add-variants\tStart\t' $( date) >> $log
176175 echo -e " Adding variants"
177176
@@ -181,70 +180,39 @@ while read vcf_f; do
181180 if [ -s $logerror ]; then
182181 printf " ERROR: Genome enrichment failed on %s\n" " $vcf_name " >&2
183182 # since failure happened, force genome enrichment to be repeated
184- rm -r " $enriched_folder " * " $variants_tmp " || true
183+ rm -r " $enriched_folder " * " $variants_tmp "
185184 exit 1
186185 fi
187186
188- # move enriched snp genome
189- mv " $variants_tmp /SNPs_genome/${ref_name} _enriched/" " $enriched_dir /"
187+ # create indels folder
188+ mkdir -p $indels_out
189+
190+ # move enriched snp genome and indels
191+ mv " $variants_tmp /SNPs_genome/${ref_name} _enriched/" " $enriched_folder /"
192+ mv $variants_tmp /fake* $indels_out
190193
191194 # create dictionaries folders if needed
192195 mkdir -p " $dict_folder " " $indel_dict_folder "
193196
194- # move annotation files
195- mv " $variants_tmp /SNPs_genome/" * .json " $dict_folder "
196- mv " $variants_tmp /SNPs_genome/log" * .txt " $indel_dict_folder "
197-
198- echo -e ' Add-variants\tEnd\t' $( date) >> " $log "
199-
200- # START STEP 1.1 - indels indexing
201- echo -e ' Indexing Indels\tStart\t' $( date) >> " $log "
202- " $starting_dir /pool_index_indels.py" \
203- " $variants_tmp /" \
204- " $pam_file " \
205- " $true_pam " \
206- " $ref_name " \
207- " $vcf_name " \
208- " $bMax " \
209- " $ncpus "
210-
211- if [ -s " $logerror " ]; then
212- printf " ERROR: indels indexing failed on %s\n" " $vcf_name " >&2
213- rm -r " $enriched_dir " * " $variants_tmp " || true
214- exit 1
215- fi
216- echo -e ' Indexing Indels\tEnd\t' $( date) >> " $log "
197+ # move variant annotation files
198+ mv $variants_tmp /SNPs_genome/* .json " $dict_folder "
199+ mv $variants_tmp /SNPs_genome/log* .txt " $indel_dict_folder "
217200
218- mkdir -p " $indels_out "
219- mv " $variants_tmp /fake" * " $indels_out "
201+ # remove temporary variant genome folder
220202 rm -r " $variants_tmp "
221- # END STEP 1.1 - indels indexing
203+
204+ echo -e ' Add-variants\tEnd\t' $( date) >> " $log "
222205 else
223206 echo -e " Variants already added"
224207 fi
225208
226- # START STEP 1.1 - indels indexing
227- # indels indexing if skipped in previous block
228- if ! [ -d " $indels_index_dir " ]; then
229- echo -e ' Indexing Indels\tStart\t' $( date) >> " $log "
230- " $starting_dir /pool_index_indels.py" \
231- " $variants_tmp /" \
232- " $pam_file " \
233- " $true_pam " \
234- " $ref_name " \
235- " $vcf_name " \
236- " $bMax " \
237- " $ncpus "
238-
239- if [ -s " $logerror " ]; then
240- printf " ERROR: indels indexing failed on %s\n" " $vcf_name " >&2
241- rm -rf " $enriched_dir " * " $variants_tmp " || true
242- exit 1
243- fi
244- echo -e ' Indexing Indels\tEnd\t' $( date) >> " $log "
245- fi
246- # END STEP 1.1 - indels indexing
209+ cd $current_working_directory
210+ fi
247211
212+ # check if anything odd happened during enrichment
213+ if [ -s $logerror ]; then
214+ printf " ERROR: Genome enrichment failed on %s\n" " $vcf_name " >&2
215+ exit 1
248216 fi
249217 # END STEP 1 - Genome enrichment
250218
@@ -266,7 +234,7 @@ while read vcf_f; do
266234 echo " Reference Index already present"
267235 echo -e ' Index-genome Reference\tEnd\t' $( date) >> " $log "
268236 idx_ref=" $idx_folder1 "
269- elif [ -d " $idx_folder2 " ]; then
237+ elif [ -d " $idx_folder2 " ]; then
270238 echo " Reference Index already present"
271239 echo -e ' Index-genome Reference\tEnd\t' $( date) >> " $log "
272240 idx_ref=" $idx_folder2 "
@@ -331,6 +299,38 @@ while read vcf_f; do
331299 echo -e ' Index-genome Variant\tEnd\t' $( date) >> " $log "
332300 idx_var=" $idx_folder1 "
333301 fi
302+
303+ # START STEP 2.3 - indels indexing
304+ indels_index_dir=" $current_working_directory /genome_library/${true_pam} _${bMax} _${ref_name} +${vcf_name} _INDELS"
305+ indels_out=" $current_working_directory /Genomes/${ref_name} +${vcf_name} _INDELS"
306+
307+ if ! [ -d " $indels_index_dir " ]; then
308+ echo -e ' Indexing Indels\tStart\t' $( date) >> " $log "
309+ " $starting_dir /pool_index_indels.py" \
310+ " $indels_out /" \
311+ " $pam_file " \
312+ " $true_pam " \
313+ " $ref_name " \
314+ " $vcf_name " \
315+ " $bMax " \
316+ " $ncpus "
317+
318+ if [ -s " $logerror " ]; then
319+ printf " ERROR: indels indexing failed on %s\n" " $vcf_name " >&2
320+ [ -d " $indels_index_dir " ] && rm -r " $indels_index_dir "
321+ exit 1
322+ fi
323+
324+ echo -e ' Indexing Indels\tEnd\t' $( date) >> " $log "
325+ else
326+ echo " Indels Index already present"
327+ fi
328+ # END STEP 2.3 - indels indexing
329+ fi
330+
331+ if [ -s $logerror ]; then
332+ printf " ERROR: Genome indexing failed" >&2
333+ exit 1
334334 fi
335335 # END STEP 2 - genome indexing
336336
@@ -346,9 +346,9 @@ while read vcf_f; do
346346 pids=() # reset process ids
347347 names=()
348348 # TODO: ricerca ref lanciata in parallelo con ricerca alternative
349- echo -e ' Off-targets search\Start \t' $( date) >> $log
349+ echo -e ' Off-targets search\tStart \t' $( date) >> $log
350350 if ! [ -f " $output_folder /crispritz_targets/${ref_name} _${pam_name} _${guide_name} _${mm} _${bDNA} _${bRNA} .targets.txt" ]; then
351- echo -e ' Search Reference\tStart\t ' $( date ) >> $log # off-targets search on reference genome
351+ echo -e ' Search Reference Start ' # off-targets search on reference genome
352352 if [ " $bDNA " -ne 0 ] || [ " $bRNA " -ne 0 ]; then # no bulges
353353 crispritz.py search $idx_ref " $pam_file " " $guide_file " " ${ref_name} _${pam_name} _${guide_name} _${mm} _${bDNA} _${bRNA} " -index -mm $mm -bDNA $bDNA -bRNA $bRNA -t -th $ceiling_result &
354354 pid_search_ref=$!
@@ -370,14 +370,15 @@ while read vcf_f; do
370370 exit 1
371371 fi
372372 fi
373+ echo -e ' Search Reference completed'
373374 else
374375 echo -e " Search for reference already done"
375376 fi
376377
377378 if [ " $vcf_name " != " _" ]; then
378379 # TODO: search in parallel on ref and alt
379380 if ! [ -f " $output_folder /crispritz_targets/${ref_name} +${vcf_name} _${pam_name} _${guide_name} _${mm} _${bDNA} _${bRNA} .targets.txt" ]; then
380- echo -e ' Search Variant\tStart\t ' $( date ) >> $log # search off-targets on alternative genomes (snps only)
381+ echo -e ' Search Variant Start ' # search off-targets on alternative genomes (snps only)
381382 if [ " $bDNA " -ne 0 ] || [ " $bRNA " -ne 0 ]; then # no bulge
382383 crispritz.py search " $idx_var " " $pam_file " " $guide_file " " ${ref_name} +${vcf_name} _${pam_name} _${guide_name} _${mm} _${bDNA} _${bRNA} " -index -mm $mm -bDNA $bDNA -bRNA $bRNA -t -th $ceiling_result -var &
383384 pid_search_var=$!
@@ -399,15 +400,14 @@ while read vcf_f; do
399400 rm -r $output_folder /* .targets.txt $output_folder /* profile* # delete results folder
400401 exit 1
401402 fi
402- echo -e ' Search Variant\tEnd\t' $( date) >> $log
403403 fi
404404 else
405405 echo -e " Search for variant already done"
406406 fi
407+ echo -e ' Search Variant completed'
407408
408409 if ! [ -f " $output_folder /crispritz_targets/indels_${ref_name} +${vcf_name} _${pam_name} _${guide_name} _${mm} _${bDNA} _${bRNA} .targets.txt" ]; then
409410 echo -e " Search INDELs Start"
410- echo -e ' Search INDELs\tStart\t' $( date) >> $log
411411 cd $starting_dir
412412 # TODO: REMOVE POOL SCRIPT FROM PROCESSING
413413 ./pool_search_indels.py " $ref_folder " " $vcf_folder " " $vcf_name " " $guide_file " " $pam_file " $bMax $mm $bDNA $bRNA " $output_folder " $true_pam " $current_working_directory /" " $ncpus "
@@ -418,8 +418,7 @@ while read vcf_f; do
418418 fi
419419 awk ' ($3 !~ "n") {print $0}' " $output_folder /indels_${ref_name} +${vcf_name} _${pam_name} _${guide_name} _${mm} _${bDNA} _${bRNA} .targets.txt" > " $output_folder /indels_${ref_name} +${vcf_name} _${pam_name} _${guide_name} _${mm} _${bDNA} _${bRNA} .targets.txt.tmp"
420420 mv " $output_folder /indels_${ref_name} +${vcf_name} _${pam_name} _${guide_name} _${mm} _${bDNA} _${bRNA} .targets.txt.tmp" " $output_folder /indels_${ref_name} +${vcf_name} _${pam_name} _${guide_name} _${mm} _${bDNA} _${bRNA} .targets.txt"
421- echo -e " Search INDELs End"
422- echo -e ' Search INDELs\tEnd\t' $( date) >> $log
421+ echo -e " Search INDELs completed"
423422 else
424423 echo -e " Search INDELs already done"
425424 fi
0 commit comments