Skip to content

Commit e5f16a3

Browse files
Merge branch 'master' of github.com:smithlabcode/smithlab_cpp
2 parents 54e2125 + 1ec1dda commit e5f16a3

15 files changed

+98
-176
lines changed

GenomicRegion.cpp

Lines changed: 4 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,7 @@ unordered_map<chrom_id_type, string> SimpleGenomicRegion::fw_table_out;
3939

4040
chrom_id_type
4141
SimpleGenomicRegion::assign_chrom(const std::string &c) {
42-
unordered_map<string, chrom_id_type>::const_iterator
43-
chr_id(fw_table_in.find(c));
42+
auto chr_id(fw_table_in.find(c));
4443
if (chr_id == fw_table_in.end()) {
4544
const chrom_id_type r = fw_table_in.size();
4645
fw_table_in[c] = r;
@@ -54,38 +53,15 @@ SimpleGenomicRegion::assign_chrom(const std::string &c) {
5453

5554
string
5655
SimpleGenomicRegion::retrieve_chrom(chrom_id_type i) {
57-
unordered_map<chrom_id_type, string>::const_iterator chr_name(fw_table_out.find(i));
58-
assert(chr_name != fw_table_out.end());
56+
auto chr_name(fw_table_out.find(i));
57+
// assert(chr_name != fw_table_out.end());
5958
return chr_name->second;
6059
}
6160

6261

6362
SimpleGenomicRegion::SimpleGenomicRegion(const GenomicRegion &r) :
6463
chrom(assign_chrom(r.get_chrom())), start(r.get_start()), end(r.get_end()) {}
6564

66-
// SimpleGenomicRegion::SimpleGenomicRegion(string string_representation) {
67-
// vector<string> parts = smithlab::split_whitespace_quoted(string_representation);
68-
69-
// // make sure there is the minimal required info
70-
// if (parts.size() < 3)
71-
// throw runtime_error("Invalid string representation: " +
72-
// string_representation);
73-
// // set the chromosome name
74-
// chrom = assign_chrom(parts[0]);
75-
76-
// // set the start position
77-
// const int checkChromStart = atoi(parts[1].c_str());
78-
// if (checkChromStart < 0)
79-
// throw runtime_error("Invalid start: " + parts[1]);
80-
// else start = static_cast<size_t>(checkChromStart);
81-
82-
// // set the end position
83-
// const int checkChromEnd = atoi(parts[2].c_str());
84-
// if (checkChromEnd < 0)
85-
// throw runtime_error("Invalid end: " + parts[2]);
86-
// else end = static_cast<size_t>(checkChromEnd);
87-
// }
88-
8965
SimpleGenomicRegion::SimpleGenomicRegion(const char *s, const size_t len) {
9066
size_t i = 0;
9167

@@ -182,7 +158,7 @@ unordered_map<chrom_id_type, string> GenomicRegion::fw_table_out;
182158

183159
chrom_id_type
184160
GenomicRegion::assign_chrom(const std::string &c) {
185-
unordered_map<string, chrom_id_type>::const_iterator chr_id(fw_table_in.find(c));
161+
auto chr_id(fw_table_in.find(c));
186162
if (chr_id == fw_table_in.end()) {
187163
const chrom_id_type r = fw_table_in.size();
188164
fw_table_in[c] = r;
@@ -200,38 +176,6 @@ GenomicRegion::retrieve_chrom(chrom_id_type i) {
200176
}
201177

202178

203-
// GenomicRegion::GenomicRegion(string string_representation) : strand('+') {
204-
// vector<string> parts(smithlab::split_whitespace_quoted(string_representation));
205-
206-
// // make sure there is the minimal required info
207-
// if (parts.size() < 3)
208-
// throw runtime_error("Invalid string representation: " +
209-
// string_representation);
210-
// // set the chromosome name
211-
// chrom = assign_chrom(parts[0]);
212-
213-
// // set the start position
214-
// const int checkChromStart = atoi(parts[1].c_str());
215-
// if (checkChromStart < 0)
216-
// throw runtime_error("Invalid start: " + parts[1]);
217-
// else start = static_cast<size_t>(checkChromStart);
218-
219-
// // set the end position
220-
// const int checkChromEnd = atoi(parts[2].c_str());
221-
// if (checkChromEnd < 0)
222-
// throw runtime_error("Invalid end: " + parts[2]);
223-
// else end = static_cast<size_t>(checkChromEnd);
224-
225-
// if (parts.size() > 3)
226-
// name = parts[3];
227-
228-
// if (parts.size() > 4)
229-
// score = atof(parts[4].c_str());
230-
231-
// if (parts.size() > 5)
232-
// strand = parts[5][0];
233-
// }
234-
235179
GenomicRegion::GenomicRegion(const char *s, const size_t len) {
236180
size_t i = 0;
237181

@@ -300,33 +244,6 @@ GenomicRegion::tostring() const {
300244
return s.str();
301245
}
302246

303-
// std::ostream&
304-
// operator<<(std::ostream& s, const GenomicRegion& region) {
305-
// return s << region.tostring();
306-
// }
307-
308-
// std::istream&
309-
// operator>>(std::istream& s, GenomicRegion& region) {
310-
// string chrom, name;
311-
// size_t start = 0ul, end = 0ul;
312-
// double score = 0.0;
313-
// char strand = '\0';
314-
315-
// if (s >> chrom >> start >> end >> name >> score >> strand)
316-
// region = GenomicRegion(chrom, start, end, name, score, strand);
317-
// else region = GenomicRegion();
318-
319-
// char c;
320-
// while ((c = s.get()) != '\n' && s);
321-
322-
// if (c != '\n')
323-
// s.setstate(std::ios::badbit);
324-
325-
// if (s.eof())
326-
// s.setstate(std::ios::badbit);
327-
328-
// return s;
329-
// }
330247

331248
bool
332249
GenomicRegion::contains(const GenomicRegion& other) const {
@@ -438,8 +355,6 @@ separate_chromosomes(const vector<GenomicRegion>& regions,
438355
}
439356

440357

441-
442-
443358
static bool
444359
is_header_line(const string& line) {
445360
static const char *browser_label = "browser";

GenomicRegion.hpp

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -39,20 +39,12 @@ class GenomicRegion;
3939

4040
class SimpleGenomicRegion {
4141
public:
42-
// Cannonical
4342
SimpleGenomicRegion() : chrom(assign_chrom("(null)")), start(0), end(0) {}
4443
void swap(SimpleGenomicRegion &rhs) {
4544
std::swap(chrom, rhs.chrom);
4645
std::swap(start, rhs.start);
4746
std::swap(end, rhs.end);
4847
}
49-
// SimpleGenomicRegion &operator=(const SimpleGenomicRegion &rhs) {
50-
// SimpleGenomicRegion tmp(rhs);
51-
// swap(tmp);
52-
// return *this;
53-
// }
54-
// SimpleGenomicRegion(const SimpleGenomicRegion &rhs) :
55-
// chrom(rhs.chrom), start(rhs.start), end(rhs.end) {}
5648

5749
// Other constructors
5850
SimpleGenomicRegion(std::string c, size_t sta, size_t e) :
@@ -136,14 +128,6 @@ class GenomicRegion {
136128
std::swap(score, rhs.score);
137129
std::swap(strand, rhs.strand);
138130
}
139-
// GenomicRegion(const GenomicRegion &other) :
140-
// chrom(other.chrom), name(other.name), start(other.start), end(other.end),
141-
// score(other.score), strand(other.strand) {}
142-
// GenomicRegion &operator=(const GenomicRegion &rhs) {
143-
// GenomicRegion tmp(rhs);
144-
// swap(tmp);
145-
// return *this;
146-
// }
147131

148132
// Other constructors
149133
GenomicRegion(std::string c, size_t sta, size_t e,
@@ -432,5 +416,4 @@ assemble_region_name(const T &region, const std::string sep) {
432416
smithlab::toa(region.get_end()));
433417
}
434418

435-
436419
#endif

Makefile.am

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ EXTRA_DIST = README.md
1818

1919
ACLOCAL_AMFLAGS = -I m4
2020

21-
CXXFLAGS = -O3
21+
CXXFLAGS = -Wall -O3
2222

2323
lib_LIBRARIES = libsmithlab_cpp.a
2424

OptionParser.cpp

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@
3232
#include <cstring>
3333
#include <cctype>
3434
#include <functional>
35-
#include <regex>
3635
#include <iterator>
3736

3837
#include "smithlab_utils.hpp"
@@ -41,7 +40,6 @@ using std::vector;
4140
using std::string;
4241
using std::endl;
4342
using std::runtime_error;
44-
using std::regex;
4543
using std::begin;
4644
using std::end;
4745

@@ -346,13 +344,23 @@ bool valid_option_char(char ch) {
346344
return std::isalnum(ch) || ch == '_';
347345
}
348346

347+
static void
348+
fix_whitespace(string &s) {
349+
std::istringstream iss(s);
350+
string token;
351+
s.clear();
352+
while (iss >> token) {
353+
if (!s.empty())
354+
s += ' ';
355+
s += token;
356+
}
357+
}
358+
349359
static void
350360
read_config_file(const string &config_filename,
351361
vector<string> &config_file_options) {
352362
static const char comment_character = '#';
353363
static const char separator_character = ':';
354-
static const string outer_space = "^[:space:]+|[:space:]+$";
355-
static const string inner_space = "([:space:])[:space:]+";
356364

357365
config_file_options.clear();
358366

@@ -368,8 +376,7 @@ read_config_file(const string &config_filename,
368376
throw runtime_error("failed to config line from " + config_filename);
369377

370378
// remove leading and trailing space
371-
line = regex_replace(line, regex(outer_space), "");
372-
line = regex_replace(line, regex(inner_space), " ");
379+
fix_whitespace(line);
373380

374381
if (!line.empty() && line.front() != comment_character) {
375382

@@ -386,7 +393,7 @@ read_config_file(const string &config_filename,
386393

387394
string option_value(line.substr(sep_pos + 1));
388395
// remove leading space
389-
option_value = regex_replace(line, regex(outer_space), "");
396+
fix_whitespace(option_value);
390397

391398
if (!all_of(begin(option_value), end(option_value), valid_option_char))
392399
throw runtime_error("bad option label: " + line);

bisulfite_utils.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222

2323
#include <string>
2424
#include <random>
25+
#include <cstdint>
2526

2627
#include "sam_record.hpp"
2728

dna_four_bit.hpp

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -118,24 +118,35 @@ struct genome_four_bit_itr {
118118
offset = (offset - 1) & 15ul;
119119
return *this;
120120
}
121+
121122
genome_four_bit_itr operator--(int) {
122123
genome_four_bit_itr tmp(*this);
123124
itr -= (offset == 0);
124125
offset = (offset - 1) & 15ul;
125126
return tmp;
126127
}
128+
127129
genome_four_bit_itr operator+(const size_t step) const {
128130
// whether the sum of offsets is >= 16
129131
const bool shift_one_pos =
130-
(((offset + (static_cast<int>(step) & 15)) & 16) >> 4);
132+
((offset + (static_cast<int>(step) & 15)) & 16) >> 4;
131133

132134
const int new_offset = (offset + step) & 15;
133-
return genome_four_bit_itr(itr + step/16 + shift_one_pos,
134-
new_offset);
135+
return genome_four_bit_itr(itr + step / 16 + shift_one_pos, new_offset);
135136
}
137+
136138
bool operator!=(const genome_four_bit_itr &rhs) const {
137139
return itr != rhs.itr || offset != rhs.offset;
138140
}
141+
142+
bool operator<(const genome_four_bit_itr &rhs) const {
143+
return itr < rhs.itr || (itr == rhs.itr && offset < rhs.offset);
144+
}
145+
146+
bool operator<=(const genome_four_bit_itr &rhs) const {
147+
return itr < rhs.itr || (itr == rhs.itr && offset <= rhs.offset);
148+
}
149+
139150
std::vector<size_t>::const_iterator itr;
140151
int offset;
141152
};

0 commit comments

Comments
 (0)