Skip to content

Commit 76476a4

Browse files
committed
version update for LRSDAY: v1.3.0 -> v1.3.1
1 parent ed48f2c commit 76476a4

23 files changed

+619
-207
lines changed

CHANGELOG.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,19 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
77

88
## [Unreleased]
99

10+
## [1.3.1] - 2019-01-22
11+
### Added
12+
- A script for generated demultiplexed fastq reads based on nanopore's guppy demultiplexing summary file.
13+
- A script for extracting all read IDs from the input fastq file.
14+
- A script for calculating the sequence length of each enclosed sequences from the input fasta file.
15+
### Changed
16+
- Software version updates for a number of dependencies. Importantly, nanopolish was bumped up to v0.11.0 to support the new multi-fast5 nanopore reads.
17+
- Downloading URL updates for testing data due to changes on the EBI data server.
18+
- Adopting picard tools' new commandline syntax.
19+
### Fixed
20+
- A bug that prevents correct parameter parsing when multiple customized canu parameters are specified.
21+
- Minor bugs related with deleting intermediate files in special cases.
22+
1023
## [1.3.0] - 2018-11-13
1124
### Added
1225
- Support for one more alternative assembler: wtdbg2.

Manual.pdf

-1.99 MB
Binary file not shown.

Project_Template/00.Long_Reads/LRSDAY.00.Long_Reads_Preprocessing.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ echo "genome_size=$genome_size, post_filtering_coverage=$post_filtering_coverage
2424
echo ""
2525
if [[ "$reads_type" == "nanopore-raw" || "$reads_type" == "nanopore-corrected" ]]
2626
then
27-
$porechop_dir/porechop -i $reads -o $prefix.porechop.fastq.gz --threads $threads > $prefix.porechop.summary.txt
27+
$porechop_dir/porechop -i $reads -o $prefix.porechop.fastq.gz --discard_middle --threads $threads > $prefix.porechop.summary.txt
2828
if [[ "$run_filtering" == "yes" ]]
2929
then
3030
$filtlong_dir/filtlong --min_length 1000 --keep_percent 90 --target_bases $filtlong_target_bases $prefix.porechop.fastq.gz | gzip > $prefix.filtlong.fastq.gz

Project_Template/00.Long_Reads/LRSDAY.00.Retrieve_Sample_PacBio_Reads.sh

Lines changed: 50 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -24,50 +24,77 @@ rm $file_name
2424
cd pacbio_fofn_files
2525
echo "download the metadata and raw PacBio reads in .h5 format ..."
2626

27-
wget ftp://ftp.sra.ebi.ac.uk/vol1/ERA525/ERA525888/pacbio_hdf5/m150811_092723_00127_c100844062550000001823187612311514_s1_p0.metadata.xml
28-
wget ftp://ftp.sra.ebi.ac.uk/vol1/ERA525/ERA525888/pacbio_hdf5/m150814_233250_00127_c100823152550000001823177111031542_s1_p0.metadata.xml
29-
wget ftp://ftp.sra.ebi.ac.uk/vol1/ERA535/ERA535258/pacbio_hdf5/m150911_220012_00127_c100861772550000001823190702121671_s1_p0.metadata.xml
30-
wget ftp://ftp.sra.ebi.ac.uk/vol1/ERA525/ERA525888/pacbio_hdf5/m150813_110541_00127_c100823112550000001823177111031581_s1_p0.metadata.xml
27+
wget ftp://ftp.sra.ebi.ac.uk/vol1/run/ERR108/ERR1080522/m150811_092723_00127_c100844062550000001823187612311514_s1_p0.metadata.xml
28+
wget ftp://ftp.sra.ebi.ac.uk/vol1/run/ERR108/ERR1080529/m150813_110541_00127_c100823112550000001823177111031581_s1_p0.metadata.xml
29+
# wget ftp://ftp.sra.ebi.ac.uk/vol1/run/ERR108/ERR1080536/m150814_201337_00127_c100823152550000001823177111031541_s1_p0.metadata.xml
30+
wget ftp://ftp.sra.ebi.ac.uk/vol1/run/ERR108/ERR1080537/m150814_233250_00127_c100823152550000001823177111031542_s1_p0.metadata.xml
31+
# wget ftp://ftp.sra.ebi.ac.uk/vol1/run/ERR112/ERR1124245/m150910_184604_00127_c100822732550000001823176011031536_s1_p0.metadata.xml
32+
wget ftp://ftp.sra.ebi.ac.uk/vol1/run/ERR114/ERR1140978/m150911_220012_00127_c100861772550000001823190702121671_s1_p0.metadata.xml
3133

3234
if [[ ! -d Analysis_Results ]]
3335
then
3436
mkdir Analysis_Results
3537
fi
3638

3739
cd Analysis_Results
38-
wget ftp://ftp.sra.ebi.ac.uk/vol1/ERA525/ERA525888/pacbio_hdf5/m150811_092723_00127_c100844062550000001823187612311514_s1_p0.1.bax.h5
39-
wget ftp://ftp.sra.ebi.ac.uk/vol1/ERA525/ERA525888/pacbio_hdf5/m150811_092723_00127_c100844062550000001823187612311514_s1_p0.2.bax.h5
40-
wget ftp://ftp.sra.ebi.ac.uk/vol1/ERA525/ERA525888/pacbio_hdf5/m150811_092723_00127_c100844062550000001823187612311514_s1_p0.3.bax.h5
41-
wget ftp://ftp.sra.ebi.ac.uk/vol1/ERA525/ERA525888/pacbio_hdf5/m150811_092723_00127_c100844062550000001823187612311514_s1_p0.bas.h5
42-
43-
wget ftp://ftp.sra.ebi.ac.uk/vol1/ERA525/ERA525888/pacbio_hdf5/m150814_233250_00127_c100823152550000001823177111031542_s1_p0.1.bax.h5
44-
wget ftp://ftp.sra.ebi.ac.uk/vol1/ERA525/ERA525888/pacbio_hdf5/m150814_233250_00127_c100823152550000001823177111031542_s1_p0.2.bax.h5
45-
wget ftp://ftp.sra.ebi.ac.uk/vol1/ERA525/ERA525888/pacbio_hdf5/m150814_233250_00127_c100823152550000001823177111031542_s1_p0.3.bax.h5
46-
wget ftp://ftp.sra.ebi.ac.uk/vol1/ERA525/ERA525888/pacbio_hdf5/m150814_233250_00127_c100823152550000001823177111031542_s1_p0.bas.h5
47-
48-
wget ftp://ftp.sra.ebi.ac.uk/vol1/ERA535/ERA535258/pacbio_hdf5/m150911_220012_00127_c100861772550000001823190702121671_s1_p0.1.bax.h5
49-
wget ftp://ftp.sra.ebi.ac.uk/vol1/ERA535/ERA535258/pacbio_hdf5/m150911_220012_00127_c100861772550000001823190702121671_s1_p0.2.bax.h5
50-
wget ftp://ftp.sra.ebi.ac.uk/vol1/ERA535/ERA535258/pacbio_hdf5/m150911_220012_00127_c100861772550000001823190702121671_s1_p0.3.bax.h5
51-
wget ftp://ftp.sra.ebi.ac.uk/vol1/ERA535/ERA535258/pacbio_hdf5/m150911_220012_00127_c100861772550000001823190702121671_s1_p0.bas.h5
52-
53-
wget ftp://ftp.sra.ebi.ac.uk/vol1/ERA525/ERA525888/pacbio_hdf5/m150813_110541_00127_c100823112550000001823177111031581_s1_p0.1.bax.h5
54-
wget ftp://ftp.sra.ebi.ac.uk/vol1/ERA525/ERA525888/pacbio_hdf5/m150813_110541_00127_c100823112550000001823177111031581_s1_p0.2.bax.h5
55-
wget ftp://ftp.sra.ebi.ac.uk/vol1/ERA525/ERA525888/pacbio_hdf5/m150813_110541_00127_c100823112550000001823177111031581_s1_p0.3.bax.h5
56-
wget ftp://ftp.sra.ebi.ac.uk/vol1/ERA525/ERA525888/pacbio_hdf5/m150813_110541_00127_c100823112550000001823177111031581_s1_p0.bas.h5
40+
41+
wget ftp://ftp.sra.ebi.ac.uk/vol1/run/ERR108/ERR1080522/m150811_092723_00127_c100844062550000001823187612311514_s1_p0.1.bax.h5
42+
wget ftp://ftp.sra.ebi.ac.uk/vol1/run/ERR108/ERR1080522/m150811_092723_00127_c100844062550000001823187612311514_s1_p0.2.bax.h5
43+
wget ftp://ftp.sra.ebi.ac.uk/vol1/run/ERR108/ERR1080522/m150811_092723_00127_c100844062550000001823187612311514_s1_p0.3.bax.h5
44+
wget ftp://ftp.sra.ebi.ac.uk/vol1/run/ERR108/ERR1080522/m150811_092723_00127_c100844062550000001823187612311514_s1_p0.bas.h5
45+
46+
wget ftp://ftp.sra.ebi.ac.uk/vol1/run/ERR108/ERR1080529/m150813_110541_00127_c100823112550000001823177111031581_s1_p0.1.bax.h5
47+
wget ftp://ftp.sra.ebi.ac.uk/vol1/run/ERR108/ERR1080529/m150813_110541_00127_c100823112550000001823177111031581_s1_p0.2.bax.h5
48+
wget ftp://ftp.sra.ebi.ac.uk/vol1/run/ERR108/ERR1080529/m150813_110541_00127_c100823112550000001823177111031581_s1_p0.3.bax.h5
49+
wget ftp://ftp.sra.ebi.ac.uk/vol1/run/ERR108/ERR1080529/m150813_110541_00127_c100823112550000001823177111031581_s1_p0.bas.h5
50+
51+
# wget ftp://ftp.sra.ebi.ac.uk/vol1/run/ERR108/ERR1080536/m150814_201337_00127_c100823152550000001823177111031541_s1_p0.1.bax.h5
52+
# wget ftp://ftp.sra.ebi.ac.uk/vol1/run/ERR108/ERR1080536/m150814_201337_00127_c100823152550000001823177111031541_s1_p0.2.bax.h5
53+
# wget ftp://ftp.sra.ebi.ac.uk/vol1/run/ERR108/ERR1080536/m150814_201337_00127_c100823152550000001823177111031541_s1_p0.3.bax.h5
54+
# wget ftp://ftp.sra.ebi.ac.uk/vol1/run/ERR108/ERR1080536/m150814_201337_00127_c100823152550000001823177111031541_s1_p0.bas.h5
55+
56+
wget ftp://ftp.sra.ebi.ac.uk/vol1/run/ERR108/ERR1080537/m150814_233250_00127_c100823152550000001823177111031542_s1_p0.1.bax.h5
57+
wget ftp://ftp.sra.ebi.ac.uk/vol1/run/ERR108/ERR1080537/m150814_233250_00127_c100823152550000001823177111031542_s1_p0.2.bax.h5
58+
wget ftp://ftp.sra.ebi.ac.uk/vol1/run/ERR108/ERR1080537/m150814_233250_00127_c100823152550000001823177111031542_s1_p0.3.bax.h5
59+
wget ftp://ftp.sra.ebi.ac.uk/vol1/run/ERR108/ERR1080537/m150814_233250_00127_c100823152550000001823177111031542_s1_p0.bas.h5
60+
61+
# wget ftp://ftp.sra.ebi.ac.uk/vol1/run/ERR112/ERR1124245/m150910_184604_00127_c100822732550000001823176011031536_s1_p0.1.bax.h5
62+
# wget ftp://ftp.sra.ebi.ac.uk/vol1/run/ERR112/ERR1124245/m150910_184604_00127_c100822732550000001823176011031536_s1_p0.2.bax.h5
63+
# wget ftp://ftp.sra.ebi.ac.uk/vol1/run/ERR112/ERR1124245/m150910_184604_00127_c100822732550000001823176011031536_s1_p0.3.bax.h5
64+
# wget ftp://ftp.sra.ebi.ac.uk/vol1/run/ERR112/ERR1124245/m150910_184604_00127_c100822732550000001823176011031536_s1_p0.bas.h5
65+
66+
wget ftp://ftp.sra.ebi.ac.uk/vol1/run/ERR114/ERR1140978/m150911_220012_00127_c100861772550000001823190702121671_s1_p0.1.bax.h5
67+
wget ftp://ftp.sra.ebi.ac.uk/vol1/run/ERR114/ERR1140978/m150911_220012_00127_c100861772550000001823190702121671_s1_p0.2.bax.h5
68+
wget ftp://ftp.sra.ebi.ac.uk/vol1/run/ERR114/ERR1140978/m150911_220012_00127_c100861772550000001823190702121671_s1_p0.3.bax.h5
69+
wget ftp://ftp.sra.ebi.ac.uk/vol1/run/ERR114/ERR1140978/m150911_220012_00127_c100861772550000001823190702121671_s1_p0.bas.h5
70+
71+
###
5772

5873
echo $(pwd)/m150811_092723_00127_c100844062550000001823187612311514_s1_p0.1.bax.h5 >> ./../$prefix.SMRTCell.1.RSII_bax.fofn
5974
echo $(pwd)/m150811_092723_00127_c100844062550000001823187612311514_s1_p0.2.bax.h5 >> ./../$prefix.SMRTCell.1.RSII_bax.fofn
6075
echo $(pwd)/m150811_092723_00127_c100844062550000001823187612311514_s1_p0.3.bax.h5 >> ./../$prefix.SMRTCell.1.RSII_bax.fofn
76+
6177
echo $(pwd)/m150814_233250_00127_c100823152550000001823177111031542_s1_p0.1.bax.h5 >> ./../$prefix.SMRTCell.2.RSII_bax.fofn
6278
echo $(pwd)/m150814_233250_00127_c100823152550000001823177111031542_s1_p0.2.bax.h5 >> ./../$prefix.SMRTCell.2.RSII_bax.fofn
6379
echo $(pwd)/m150814_233250_00127_c100823152550000001823177111031542_s1_p0.3.bax.h5 >> ./../$prefix.SMRTCell.2.RSII_bax.fofn
80+
6481
echo $(pwd)/m150911_220012_00127_c100861772550000001823190702121671_s1_p0.1.bax.h5 >> ./../$prefix.SMRTCell.3.RSII_bax.fofn
6582
echo $(pwd)/m150911_220012_00127_c100861772550000001823190702121671_s1_p0.2.bax.h5 >> ./../$prefix.SMRTCell.3.RSII_bax.fofn
6683
echo $(pwd)/m150911_220012_00127_c100861772550000001823190702121671_s1_p0.3.bax.h5 >> ./../$prefix.SMRTCell.3.RSII_bax.fofn
84+
6785
echo $(pwd)/m150813_110541_00127_c100823112550000001823177111031581_s1_p0.1.bax.h5 >> ./../$prefix.SMRTCell.4.RSII_bax.fofn
6886
echo $(pwd)/m150813_110541_00127_c100823112550000001823177111031581_s1_p0.2.bax.h5 >> ./../$prefix.SMRTCell.4.RSII_bax.fofn
6987
echo $(pwd)/m150813_110541_00127_c100823112550000001823177111031581_s1_p0.3.bax.h5 >> ./../$prefix.SMRTCell.4.RSII_bax.fofn
7088

89+
# echo $(pwd)/m150814_201337_00127_c100823152550000001823177111031541_s1_p0.1.bax.h5 >> ./../$prefix.SMRTCell.5.RSII_bax.fofn
90+
# echo $(pwd)/m150814_201337_00127_c100823152550000001823177111031541_s1_p0.2.bax.h5 >> ./../$prefix.SMRTCell.5.RSII_bax.fofn
91+
# echo $(pwd)/m150814_201337_00127_c100823152550000001823177111031541_s1_p0.3.bax.h5 >> ./../$prefix.SMRTCell.5.RSII_bax.fofn
92+
93+
# echo $(pwd)/m150910_184604_00127_c100822732550000001823176011031536_s1_p0.1.bax.h5 >> ./../$prefix.SMRTCell.6.RSII_bax.fofn
94+
# echo $(pwd)/m150910_184604_00127_c100822732550000001823176011031536_s1_p0.2.bax.h5 >> ./../$prefix.SMRTCell.6.RSII_bax.fofn
95+
# echo $(pwd)/m150910_184604_00127_c100822732550000001823176011031536_s1_p0.3.bax.h5 >> ./../$prefix.SMRTCell.6.RSII_bax.fofn
96+
97+
7198
cd ..
7299
cd ..
73100

Project_Template/00.Long_Reads/nanopore_fast5_files/.gitkeep

Whitespace-only changes.

Project_Template/00.Long_Reads/pacbio_fofn_files/.gitkeep

Whitespace-only changes.

Project_Template/00.Long_Reads/pacbio_fofn_files/Analysis_Results/.gitkeep

Whitespace-only changes.

Project_Template/01.Long-read-based_Genome_Assembly/LRSDAY.01.Long-read-based_Genome_Assembly.sh

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@ long_reads="./../00.Long_Reads/SK1.filtered_subreads.fastq.gz" # The file path o
1212
long_reads_type="pacbio-raw" # The long reads data type. Use "pacbio-raw" or "pacbio-corrected" or "nanopore-raw" or "nanopore-corrected". Default = "pacbio-raw" for the testing example
1313
genome_size="12.5m" # The estimated genome size with the format of <number>[g|m|k], e.g. 12.5m for 12.5 Mb. Default = "12.5m".
1414
assembler="canu" # The long-read assembler: Use "canu" or "flye" or "wtdbg2" or "smartdenovo" or "canu-flye" or "canu-wtdbg2" or "canu-smartdenovo". For "canu-flye", "canu-wtdbg2", and "canu-smartdenovo", the assembler canu is used first to generate error-corrected reads from the raw reads and then the assembler flye/wtdbg2/smartdenovo is used to assemble the genome. Based on our test, assembler="canu" generally gives the best result but will take substantially longer time than the other options.
15-
customized_canu_parameters="-correctedErrorRate=0.04" # For assembler="canu" only. Users can set customized Canu assembly parameters here or simply leave it empty like "" to use Canu's default assembly parameter. For example you could set "-correctedErrorRate=0.04" for high coverage (>60X) PacBio data and "-overlapper=mhap -utgReAlign=true" for high coverage (>60X) Nanopore data to improve the assembly speed. More than one customized parameters can be set here as long as they are separeted by space (e.g. "-option1=XXX -option2=YYY -option3=ZZZ"). Please consult Canu's manual "http://canu.readthedocs.io/en/latest/faq.html#what-parameters-can-i-tweak" for advanced customization settings. Default = "-correctedErrorRate=0.04" for the testing example.
16-
threads=1 # The number of threads to use. Default = 1.
15+
customized_canu_parameters="correctedErrorRate=0.04" # For assembler="canu" only. Users can set customized Canu assembly parameters here or simply leave it empty like customized_canu_parameters="" to use Canu's default assembly parameter. For example you could set customized_canu_parameters="correctedErrorRate=0.04" for high coverage (>60X) PacBio data and customized_canu_parameters="overlapper=mhap;utgReAlign=true" for high coverage (>60X) Nanopore data to improve the assembly speed. When assembling genomes with high heterozygosity, you can could set customized_canu_parameters="corOutCoverage=200;batOptions=-dg 3 -db 3 -dr 1 -ca 500 -cp 50" to avoid collasping haplotypes. As shown in these examples, more than one customized parameters can be set here as long as they are separeted by a semicolon and contained in a pair of double quotes (e.g. customized_canu_parameters="option1=XXX;option2=YYY;option3=ZZZ"). Please consult Canu's manual "http://canu.readthedocs.io/en/latest/faq.html#what-parameters-can-i-tweak" for advanced customization settings. Default = "correctedErrorRate=0.04" for the testing example.
16+
threads=2 # The number of threads to use. Default = 2.
1717
vcf="yes" # Use "yes" if prefer to have vcf file generated to show SNP and INDEL differences between the assembled genome and the reference genome for their uniquely alignable regions. Otherwise use "no". Default = "yes".
1818
dotplot="yes" # Use "yes" if prefer to plot genome-wide dotplot based on the comparison with the reference genome below. Otherwise use "no". Default = "yes".
1919
ref_genome_raw="./../00.Ref_Genome/S288C.ASM205763v1.fa" # The file path of the raw reference genome. This is only needed when the option "dotplot=" or "vcf=" has been set as "yes".
@@ -45,20 +45,23 @@ then
4545
fi
4646
fi
4747

48-
49-
5048
out_dir=${prefix}_${assembler}_out
5149

5250
if [[ "$assembler" == "canu" ]]
5351
then
52+
OLDIFS=$IFS;
53+
IFS=";"
54+
customized_canu_parameters_array=($customized_canu_parameters)
55+
IFS=$OLDIFS;
56+
printf "%s\n" "${customized_canu_parameters_array[@]}" > $prefix.customized_canu_parameters.spec
5457
$canu_dir/canu -p $prefix -d $out_dir \
58+
-s $prefix.customized_canu_parameters.spec \
5559
useGrid=false \
5660
maxThreads=$threads \
5761
genomeSize=$genome_size \
5862
gnuplot=$gnuplot_dir/gnuplot \
59-
-${long_reads_type} $long_reads \
60-
$customized_canu_parameters
61-
63+
-${long_reads_type} $long_reads
64+
mv $prefix.customized_canu_parameters.spec ./$out_dir/
6265
perl $LRSDAY_HOME/scripts/simplify_seq_name.pl -i $out_dir/$prefix.contigs.fasta -o $prefix.assembly.$assembler.fa
6366
elif [[ "$assembler" == "flye" ]]
6467
then
@@ -104,7 +107,6 @@ then
104107
genomeSize=$genome_size \
105108
gnuplot=$gnuplot_dir/gnuplot \
106109
-${long_reads_type} $long_reads \
107-
# $customized_canu_parameters
108110

109111
if [[ "$long_reads_type" == "pacbio-raw" ]]
110112
then
@@ -133,7 +135,6 @@ then
133135
genomeSize=$genome_size \
134136
gnuplot=$gnuplot_dir/gnuplot \
135137
-${long_reads_type} $long_reads \
136-
# $customized_canu_parameters
137138

138139
mkdir -p $out_dir/wtdbg2
139140
cd $out_dir/wtdbg2
@@ -149,7 +150,6 @@ then
149150
genomeSize=$genome_size \
150151
gnuplot=$gnuplot_dir/gnuplot \
151152
-${long_reads_type} $long_reads \
152-
# $customized_canu_parameters
153153

154154
mkdir -p $out_dir/smartdenovo
155155
cd $out_dir/smartdenovo
@@ -195,7 +195,6 @@ then
195195
rm *.delta
196196
rm *.delta_filter
197197
rm ref_genome.fa
198-
rm ref_genome.fa.fai
199198
if [[ $vcf == "yes" ]]
200199
then
201200
rm *.filter.coords

Project_Template/02.Long-read-based_Assembly_Polishing/LRSDAY.02.Long-read-based_Assembly_Polishing.sh

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ pacbio_reads_type="RSII" # The sequencing machine used to generate the input Pac
1717

1818
### When long_read_technology="nanopore" ###
1919
nanopore_fast5_files="./../00.Long_Reads/nanopore_fast5_files" # The file path to the directory containing raw Oxford Nanopore FAST5 files.
20-
nanopore_albacore_sequencing_summary="./../00.Long_Reads/nanopore_fast5_files/sequencing_summary.txt" # The file path to the albacore basecaller sequencing summary output. This summary file is not necessary but it can help the polishing step to run much faster when available. When this file is unavailable, set nanopore_albacore_sequencing_summary="".
20+
nanopore_basecalling_sequencing_summary="./../00.Long_Reads/nanopore_fast5_files/sequencing_summary.txt" # The file path to the nanopore albacore/guppy basecaller sequencing summary output. This summary file is not necessary but it can help the polishing step to run much faster when available. When this file is unavailable, set nanopore_albacore_sequencing_summary="".
2121

2222
prefix="SK1" # The file name prefix for the output files. Default = "SK1" for the testing example.
2323

@@ -46,7 +46,7 @@ then
4646
$conda_pacbio_dir/variantCaller --algorithm=quiver -x 5 -X 120 -q 20 -v -j $threads $prefix.pbalign.bam -r $prefix.assembly.raw.fa -o $prefix.assembly.consensus.fa -o $prefix.assembly.consensus.fq -o $prefix.assembly.consensus.vcf --diploid
4747
fi
4848
else
49-
$conda_pacbio_dir/pbalign --nproc $threads --algorithm blasr --tmpDir ./tmp $pacbio_fofn_file $prefix.assembly.raw.fa $prefix.pbalign.bam
49+
$conda_pacbio_dir/pbalign --nproc $threads --algorithm blasr --tmpDir ./tmp $pacbio_bam_fofn_file $prefix.assembly.raw.fa $prefix.pbalign.bam
5050
if [[ $ploidy == "1" ]]
5151
then
5252
$conda_pacbio_dir/variantCaller --algorithm=arrow -x 5 -X 120 -q 20 -v -j $threads $prefix.pbalign.bam -r $prefix.assembly.raw.fa -o $prefix.assembly.consensus.fa -o $prefix.assembly.consensus.fq -o $prefix.assembly.consensus.vcf
@@ -61,11 +61,11 @@ then
6161
else
6262
# perform correction using the minimap2-nanopolish pipeline
6363
source $nanopolish_dir/py3_virtualenv_nanopolish/bin/activate
64-
if [[ -z "$nanopore_albacore_sequencing_summary" ]]
64+
if [[ -z "$nanopore_basecalling_sequencing_summary" ]]
6565
then
6666
$nanopolish_dir/nanopolish index -d $nanopore_fast5_files $long_reads_in_fastq
6767
else
68-
$nanopolish_dir/nanopolish index -d $nanopore_fast5_files -s $nanopore_albacore_sequencing_summary $long_reads_in_fastq
68+
$nanopolish_dir/nanopolish index -d $nanopore_fast5_files -s $nanopore_basecalling_sequencing_summary $long_reads_in_fastq
6969
fi
7070
java -Djava.io.tmpdir=./tmp -XX:ParallelGCThreads=$threads -jar $picard_dir/picard.jar CreateSequenceDictionary REFERENCE=$prefix.assembly.raw.fa OUTPUT=$prefix.assembly.raw.dict
7171
$minimap2_dir/minimap2 -ax map-ont $prefix.assembly.raw.fa $long_reads_in_fastq > $prefix.minimap2.sam

0 commit comments

Comments
 (0)