2525using std::vector;
2626using std::string;
2727using std::unordered_map;
28+ using std::runtime_error;
2829
2930static const char *digits = " 0987654321" ;
3031static const char *whitespace = " \t " ;
@@ -33,74 +34,74 @@ static const char *whitespace = " \t";
3334// thing chrom?
3435
3536void
36- parse_region_name (string region_name,
37- string& chrom, size_t &start, size_t &end) {
38-
37+ parse_region_name (string region_name,
38+ string& chrom, size_t &start, size_t &end) {
39+
3940 const size_t colon_offset = region_name.find (" :" );
40-
41+
4142 // get the chromosome
4243 size_t chr_offset = region_name.find_last_of (whitespace, colon_offset);
4344 if (chr_offset == string::npos)
4445 chr_offset = 0 ;
4546 else
4647 chr_offset += 1 ;
4748 chrom = region_name.substr (chr_offset, colon_offset - chr_offset);
48-
49+
4950 // get the start
5051 const size_t start_end = region_name.find (" -" , colon_offset + 1 );
5152 const string start_string = region_name.substr (colon_offset + 1 ,
52- start_end - colon_offset + 1 );
53+ start_end - colon_offset + 1 );
5354 start = static_cast <size_t >(atoi (start_string.c_str ()));
54-
55+
5556 // get the end
5657 const size_t end_end =
5758 region_name.find_first_not_of (digits, start_end + 1 );
5859 const string end_string = region_name.substr (start_end + 1 ,
59- end_end - start_end - 1 );
60+ end_end - start_end - 1 );
6061 end = static_cast <size_t >(atoi (end_string.c_str ()));
6162}
6263
6364
64- static size_t
65+ static size_t
6566adjust_start_pos (const size_t orig_start, const string &chrom_name) {
6667 static const double LINE_WIDTH = 50.0 ;
6768 const size_t name_offset = chrom_name.length () + 2 ; // For the '>' and '\n';
68- const size_t preceding_newlines =
69+ const size_t preceding_newlines =
6970 static_cast <size_t >(std::floor (orig_start / LINE_WIDTH));
7071 return orig_start + preceding_newlines + name_offset;
7172}
7273
7374
74- static size_t
75+ static size_t
7576adjust_region_size (const size_t orig_start,
76- const string &chrom_name,
77- const size_t orig_size) {
77+ const string &chrom_name,
78+ const size_t orig_size) {
7879 static const double LINE_WIDTH = 50.0 ;
79- const size_t preceding_newlines_start =
80+ const size_t preceding_newlines_start =
8081 static_cast <size_t >(std::floor (orig_start / LINE_WIDTH));
81- const size_t preceding_newlines_end =
82+ const size_t preceding_newlines_end =
8283 static_cast <size_t >(std::floor ((orig_start + orig_size) / LINE_WIDTH));
8384 return (orig_size + (preceding_newlines_end - preceding_newlines_start));
8485}
8586
8687
87- void
88+ void
8889extract_regions_chrom_fasta (const string &chrom_name,
89- const string &filename,
90- const vector<SimpleGenomicRegion> ®ions,
91- vector<string> &sequences) {
92-
90+ const string &filename,
91+ const vector<SimpleGenomicRegion> ®ions,
92+ vector<string> &sequences) {
93+
9394 std::ifstream in (filename.c_str ());
9495 for (vector<SimpleGenomicRegion>::const_iterator i (regions.begin ());
9596 i != regions.end (); ++i) {
96-
97+
9798 const size_t orig_start_pos = i->get_start ();
9899 const size_t orig_end_pos = i->get_end ();
99100 const size_t orig_region_size = orig_end_pos - orig_start_pos;
100101
101102 const size_t start_pos = adjust_start_pos (orig_start_pos, chrom_name);
102103 const size_t region_size = adjust_region_size (
103- orig_start_pos, chrom_name, orig_region_size);
104+ orig_start_pos, chrom_name, orig_region_size);
104105 assert (start_pos >= 0 );
105106
106107 in.seekg (start_pos);
@@ -109,12 +110,12 @@ extract_regions_chrom_fasta(const string &chrom_name,
109110 in.read (buffer, region_size);
110111
111112 std::remove_if (buffer, buffer + region_size,
112- std::bind2nd (std::equal_to<char >(), ' \n ' ));
113+ std::bind2nd (std::equal_to<char >(), ' \n ' ));
113114 buffer[orig_region_size] = ' \0 ' ;
114-
115+
115116 sequences.push_back (buffer);
116117 std::transform (sequences.back ().begin (), sequences.back ().end (),
117- sequences.back ().begin (), std::ptr_fun (&toupper));
118+ sequences.back ().begin (), std::ptr_fun (&toupper));
118119 assert (i->get_width () == sequences.back ().length ());
119120 }
120121 in.close ();
@@ -123,21 +124,21 @@ extract_regions_chrom_fasta(const string &chrom_name,
123124
124125void
125126extract_regions_chrom_fasta (const string &chrom_name,
126- const string &filename,
127- const vector<GenomicRegion> ®ions,
128- vector<string> &sequences) {
129-
127+ const string &filename,
128+ const vector<GenomicRegion> ®ions,
129+ vector<string> &sequences) {
130+
130131 std::ifstream in (filename.c_str ());
131132 for (vector<GenomicRegion>::const_iterator i (regions.begin ());
132133 i != regions.end (); ++i) {
133-
134+
134135 const size_t orig_start_pos = i->get_start ();
135136 const size_t orig_end_pos = i->get_end ();
136137 const size_t orig_region_size = orig_end_pos - orig_start_pos;
137138
138139 const size_t start_pos = adjust_start_pos (orig_start_pos, chrom_name);
139140 const size_t region_size = adjust_region_size (
140- orig_start_pos, chrom_name, orig_region_size);
141+ orig_start_pos, chrom_name, orig_region_size);
141142 assert (start_pos >= 0 );
142143
143144 in.seekg (start_pos);
@@ -146,14 +147,14 @@ extract_regions_chrom_fasta(const string &chrom_name,
146147 in.read (buffer, region_size);
147148
148149 std::remove_if (
149- buffer, buffer + region_size,
150- std::bind2nd (std::equal_to<char >(), ' \n ' ));
150+ buffer, buffer + region_size,
151+ std::bind2nd (std::equal_to<char >(), ' \n ' ));
151152 buffer[orig_region_size] = ' \0 ' ;
152153
153154 sequences.push_back (buffer);
154155 std::transform (
155- sequences.back ().begin (), sequences.back ().end (),
156- sequences.back ().begin (), std::ptr_fun (&toupper));
156+ sequences.back ().begin (), sequences.back ().end (),
157+ sequences.back ().begin (), std::ptr_fun (&toupper));
157158 if (i->neg_strand ())
158159 revcomp_inplace (sequences.back ());
159160 assert (i->get_width () == sequences.back ().length ());
@@ -164,12 +165,12 @@ extract_regions_chrom_fasta(const string &chrom_name,
164165
165166void
166167extract_regions_fasta (const string &dirname,
167- const vector<GenomicRegion> ®ions_in,
168- vector<string> &sequences) {
169-
168+ const vector<GenomicRegion> ®ions_in,
169+ vector<string> &sequences) {
170+
170171 static const string FASTA_SUFFIX (" .fa" );
171172 assert (check_sorted (regions_in));
172-
173+
173174 vector<string> filenames;
174175 read_dir (dirname, filenames);
175176
@@ -187,15 +188,15 @@ extract_regions_fasta(const string &dirname,
187188 std::unordered_map<string, size_t >::const_iterator f_idx =
188189 chrom_regions_map.find (chrom_file);
189190 if (f_idx == chrom_regions_map.end ())
190- throw SMITHLABException (" chrom not found:\t " + chrom_file);
191+ throw runtime_error (" chrom not found:\t " + chrom_file);
191192 extract_regions_chrom_fasta (
192- chrom_name, filenames[f_idx->second ], regions[i], sequences);
193+ chrom_name, filenames[f_idx->second ], regions[i], sequences);
193194 }
194195}
195196
196197
197198void extract_regions_fasta (const string &dirname,
198- const vector<SimpleGenomicRegion> ®ions_in, vector<string> &sequences) {
199+ const vector<SimpleGenomicRegion> ®ions_in, vector<string> &sequences) {
199200
200201 static const string FASTA_SUFFIX (" .fa" );
201202 assert (check_sorted (regions_in));
@@ -217,16 +218,16 @@ void extract_regions_fasta(const string &dirname,
217218 std::unordered_map<string, size_t >::const_iterator f_idx =
218219 chrom_regions_map.find (chrom_file);
219220 if (f_idx == chrom_regions_map.end ())
220- throw SMITHLABException (" chrom not found:\t " + chrom_file);
221+ throw runtime_error (" chrom not found:\t " + chrom_file);
221222 extract_regions_chrom_fasta (
222- chrom_name, filenames[f_idx->second ], regions[i], sequences);
223+ chrom_name, filenames[f_idx->second ], regions[i], sequences);
223224 }
224225}
225226
226227
227228void
228- identify_chromosomes (const string chrom_file, const string fasta_suffix,
229- unordered_map<string, string> &chrom_files) {
229+ identify_chromosomes (const string chrom_file, const string fasta_suffix,
230+ unordered_map<string, string> &chrom_files) {
230231 vector<string> the_files;
231232 if (isdir (chrom_file.c_str ())) {
232233 read_dir (chrom_file, fasta_suffix, the_files);
@@ -238,9 +239,9 @@ identify_chromosomes(const string chrom_file, const string fasta_suffix,
238239
239240
240241void
241- identify_and_read_chromosomes (const string chrom_file,
242- const string fasta_suffix,
243- unordered_map<string, string> &chrom_files) {
242+ identify_and_read_chromosomes (const string chrom_file,
243+ const string fasta_suffix,
244+ unordered_map<string, string> &chrom_files) {
244245 vector<string> the_files;
245246 if (isdir (chrom_file.c_str ())) {
246247 read_dir (chrom_file, fasta_suffix, the_files);
0 commit comments