Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions rsem-prepare-reference
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ my $bowtie2_path = "";
my $star = 0;
my $star_path = '';
my $star_sjdboverhang = 100;
my $star_ramlimit = ''; ## for STAR, the limit of RAM to use for building genome indices, in bytes. Default is 31GB, set below which is enough for most genomes.
my $star_genomeSAindexNbases = ''; ## for STAR, the number of bases in SA index, default is 14, which is enough for most genomes

my $hisat2_hca = 0;
my $hisat2_path = '';
Expand All @@ -66,6 +68,8 @@ GetOptions("gtf=s" => \$gtfF,
"star" => \$star,
"star-path=s" => \$star_path,
"star-sjdboverhang=i" => \$star_sjdboverhang,
"star-ramlimit=i" => \$star_ramlimit,
"star-genomeSAindexNbases=i" => \$star_genomeSAindexNbases,
"hisat2-hca" =>\$hisat2_hca,
"hisat2-path=s" => \$hisat2_path,
"p|num-threads=i" => \$nthreads,
Expand Down Expand Up @@ -182,14 +186,26 @@ if ($bowtie2) {

if ($star) {
pod2usage(-msg => "Sorry, if you want RSEM run STAR for you, you must provide the genome sequence and associated GTF annotation.", -exitval => 2, -verbose => 2) if ($gtfF eq "");

# Ensure star_ramlimit is set to default if not provided by user
if (!defined($star_ramlimit) || $star_ramlimit eq '' || $star_ramlimit !~ /^\d+$/) {
$star_ramlimit = '31000000000';
}

# Ensure star_sjdboverhang is set to default if not provided by user
if (!defined($star_genomeSAindexNbases) || $star_genomeSAindexNbases eq '' || $star_genomeSAindexNbases !~ /^\d+$/) {
$star_genomeSAindexNbases = '14';
}

my $out_star_genome_path = dirname($ARGV[1]);
$command = $star_path . "STAR " .
" --runThreadN $nthreads " .
" --runMode genomeGenerate " .
" --genomeDir $out_star_genome_path " .
" --genomeFastaFiles @list " .
" --sjdbGTFfile $gtfF " .
" --limitGenomeGenerateRAM $star_ramlimit " .
" --genomeSAindexNbases $star_genomeSAindexNbases " .
" --sjdbOverhang $star_sjdboverhang " .
" --outFileNamePrefix $ARGV[1]";
&runCommand($command);
Expand Down Expand Up @@ -357,10 +373,18 @@ Build STAR indices. (Default: off)

The path to STAR's executable. (Default: the path to STAR executable is assumed to be in user's PATH environment variable)

=item B<--star-ramlimit> <int>

Maximum available RAM (bytes) for STAR index generation. (Default: 31000000000)

=item B<--star-sjdboverhang> <int>

Length of the genomic sequence around annotated junction. It is only used for STAR to build splice junctions database and not needed for Bowtie or Bowtie2. It will be passed as the --sjdbOverhang option to STAR. According to STAR's manual, its ideal value is max(ReadLength)-1, e.g. for 2x101 paired-end reads, the ideal value is 101-1=100. In most cases, the default value of 100 will work as well as the ideal value. (Default: 100)

=item B<--star-genomeSAindexNbases> <int>

Length (bases) of the SA pre-indexing string. Typically between 10 and 15. Longer strings will use much more memory, but allow faster searches. For small genomes, the parameter must be scaled down to min(14, log2(GenomeLength)/2 - 1).

=item B<--hisat2-hca>

Build HISAT2 indices on the transcriptome according to Human Cell Atlas (HCA) SMART-Seq2 pipeline. (Default: off)
Expand Down