@@ -64,7 +64,7 @@ parse_region_name(string region_name,
6464
6565static size_t
6666adjust_start_pos (const size_t orig_start, const string &chrom_name) {
67- static const double LINE_WIDTH = 50.0 ;
67+ static const double LINE_WIDTH = 50.0 ; // ADS: dangerous; often this is 80
6868 const size_t name_offset = chrom_name.length () + 2 ; // For the '>' and '\n';
6969 const size_t preceding_newlines =
7070 static_cast <size_t >(std::floor (orig_start / LINE_WIDTH));
@@ -76,92 +76,62 @@ static size_t
7676adjust_region_size (const size_t orig_start,
7777 const string &chrom_name, // ADS: remove this soon
7878 const size_t orig_size) {
79- static const double LINE_WIDTH = 50.0 ;
79+ static const double LINE_WIDTH = 50.0 ; // ADS: dangerous; often this is 80
8080 const size_t preceding_newlines_start =
8181 static_cast <size_t >(std::floor (orig_start / LINE_WIDTH));
8282 const size_t preceding_newlines_end =
8383 static_cast <size_t >(std::floor ((orig_start + orig_size) / LINE_WIDTH));
8484 return (orig_size + (preceding_newlines_end - preceding_newlines_start));
8585}
8686
87-
87+ template < class T >
8888void
89- extract_regions_chrom_fasta (const string &chrom_name,
90- const string &filename,
91- const vector<SimpleGenomicRegion > ®ions,
92- vector<string> &sequences) {
93-
94- std::ifstream in (filename. c_str () );
95- for (vector<SimpleGenomicRegion>::const_iterator i (regions. begin () );
96- i != regions. end (); ++i) {
97-
98- const size_t orig_start_pos = i-> get_start ();
99- const size_t orig_end_pos = i-> get_end ();
100- const size_t orig_region_size = orig_end_pos - orig_start_pos;
101-
102- const size_t start_pos = adjust_start_pos (orig_start_pos, chrom_name);
103- const size_t region_size = adjust_region_size (
104- orig_start_pos, chrom_name, orig_region_size);
89+ extract_regions_chrom_fasta_impl (const string &chrom_name,
90+ const string &filename,
91+ const vector<T > ®ions,
92+ vector<string> &sequences) {
93+
94+ std::ifstream in (filename);
95+ if (!in) throw runtime_error ( " failed to open file: " + filename );
96+
97+ for ( auto &i : regions) {
98+ const auto orig_start_pos = i. get_start ();
99+ const auto orig_end_pos = i. get_end ();
100+ const auto orig_region_size = orig_end_pos - orig_start_pos;
101+
102+ const auto start_pos = adjust_start_pos (orig_start_pos, chrom_name);
103+ const auto region_size =
104+ adjust_region_size ( orig_start_pos, chrom_name, orig_region_size);
105105 assert (start_pos >= 0 );
106106
107107 in.seekg (start_pos);
108- char buffer[region_size + 1 ];
109- buffer[region_size] = ' \0 ' ;
110- in.read (buffer, region_size);
111-
112- std::remove_if (buffer, buffer + region_size,
113- [](const char x) {return x == ' \n ' ;});
114- buffer[orig_region_size] = ' \0 ' ;
115-
116- sequences.push_back (buffer);
117- std::transform (sequences.back ().begin (), sequences.back ().end (),
118- sequences.back ().begin (), [](const char x) {return toupper (x);});
119- assert (i->get_width () == sequences.back ().length ());
108+ string buffer (region_size, ' \0 ' );
109+ in.read (buffer.data (), region_size);
110+
111+ buffer.erase (remove (begin (buffer), end (buffer), ' \n ' ));
112+ transform (cbegin (buffer), cend (buffer), begin (buffer),
113+ [](const char x) {return toupper (x);});
114+ sequences.push_back (move (buffer));
115+ assert (i.get_width () == sequences.back ().size ());
120116 }
121- in.close ();
122117}
123118
119+ void
120+ extract_regions_chrom_fasta (const string &chrom_name,
121+ const string &filename,
122+ const vector<SimpleGenomicRegion> ®ions,
123+ vector<string> &sequences) {
124+ extract_regions_chrom_fasta_impl (chrom_name, filename, regions, sequences);
125+ }
124126
125127void
126128extract_regions_chrom_fasta (const string &chrom_name,
127129 const string &filename,
128130 const vector<GenomicRegion> ®ions,
129131 vector<string> &sequences) {
130-
131- std::ifstream in (filename.c_str ());
132- for (vector<GenomicRegion>::const_iterator i (regions.begin ());
133- i != regions.end (); ++i) {
134-
135- const size_t orig_start_pos = i->get_start ();
136- const size_t orig_end_pos = i->get_end ();
137- const size_t orig_region_size = orig_end_pos - orig_start_pos;
138-
139- const size_t start_pos = adjust_start_pos (orig_start_pos, chrom_name);
140- const size_t region_size = adjust_region_size (
141- orig_start_pos, chrom_name, orig_region_size);
142- assert (start_pos >= 0 );
143-
144- in.seekg (start_pos);
145- char buffer[region_size + 1 ];
146- buffer[region_size] = ' \0 ' ;
147- in.read (buffer, region_size);
148-
149- std::remove_if (buffer, buffer + region_size,
150- [](const char x) {return x == ' \n ' ;});
151- buffer[orig_region_size] = ' \0 ' ;
152-
153- sequences.push_back (buffer);
154- std::transform (sequences.back ().begin (), sequences.back ().end (),
155- sequences.back ().begin (), [](const char x) {return toupper (x);});
156-
157- if (i->neg_strand ())
158- revcomp_inplace (sequences.back ());
159- assert (i->get_width () == sequences.back ().length ());
160- }
161- in.close ();
132+ extract_regions_chrom_fasta_impl (chrom_name, filename, regions, sequences);
162133}
163134
164-
165135void
166136extract_regions_fasta (const string &dirname,
167137 const vector<GenomicRegion> ®ions_in,
0 commit comments