Skip to content

Commit 278adcd

Browse files
committed
Merge commit '259602fdc08ca9835dfc6871eb477c481b4d9b40' as 'src/smithlab_cpp'
2 parents ee9682b + 259602f commit 278adcd

26 files changed

+5144
-0
lines changed

src/smithlab_cpp/.gitignore

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
# no compiled object file
2+
*.[oa]
3+
*.so
4+
5+
# no temparary file
6+
*~

src/smithlab_cpp/FileIterator.cpp

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
/*
2+
* Copyright (C) 2009 University of Southern California and
3+
* Andrew D. Smith
4+
*
5+
* Authors: Andrew D. Smith
6+
*
7+
* This program is free software: you can redistribute it and/or modify
8+
* it under the terms of the GNU General Public License as published by
9+
* the Free Software Foundation, either version 3 of the License, or
10+
* (at your option) any later version.
11+
*
12+
* This program is distributed in the hope that it will be useful,
13+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
14+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15+
* GNU General Public License for more details.
16+
*
17+
* You should have received a copy of the GNU General Public License
18+
* along with this program. If not, see <http://www.gnu.org/licenses/>.
19+
*/
20+
21+
#include "FileIterator.hpp"
22+
#include "GenomicRegion.hpp"
23+
#include "MappedRead.hpp"
24+
25+
#include <iostream>
26+
27+
using std::istream;
28+
using std::vector;
29+
using std::string;
30+
31+
/* THIS FUNCTION FILLS A BUFFER FOR GenomicRegion OBJECTS
32+
*/
33+
void
34+
fill_buffer(std::ifstream &in, const size_t buffer_start,
35+
vector<GenomicRegion> &buffer) {
36+
GenomicRegion tmp;
37+
size_t i = buffer_start;
38+
assert(buffer_start <= buffer.size());
39+
for (; i != buffer.size() && !in.eof(); ++i) {
40+
in >> tmp;
41+
buffer[i].swap(tmp);
42+
in.peek();
43+
}
44+
if (i < buffer.size())
45+
buffer.erase(buffer.begin() + i, buffer.end());
46+
}
47+
48+
/* THIS FUNCTION FILLS A BUFFER FOR THE ACTUAL READS, REPRESENTED AS
49+
* STRINGS, AND MUST BE IN A FASTA FORMAT FILE
50+
*/
51+
void
52+
fill_buffer(std::ifstream &in, const size_t buffer_start,
53+
vector<string> &buffer) {
54+
string tmp;
55+
size_t i = buffer_start;
56+
for (; i != buffer.size() && !in.eof(); ++i) {
57+
// the read name...
58+
in >> tmp; // DANGER: assumes that the name of the read has no
59+
// spaces in it!!
60+
// the read itself:
61+
in >> buffer[i];
62+
in.peek();
63+
}
64+
if (i < buffer.size())
65+
buffer.erase(buffer.begin() + i, buffer.end());
66+
}
67+
68+
69+
/* THIS FUNCTION FILLS A BUFFER FOR THE ACTUAL READS, REPRESENTED AS
70+
* RECORDS IN A FASTQ FILE, INCLUDING THE QUALITY SCORES
71+
*/
72+
void
73+
fill_buffer(std::ifstream &in, const size_t buffer_start,
74+
vector<FASTQRecord> &buffer) {
75+
string tmp, read_seq, scores_seq;
76+
size_t i = buffer_start;
77+
for (; i != buffer.size() && !in.eof(); ++i) {
78+
// the read name...
79+
in >> tmp; // DANGER: assumes that the name of the read has no
80+
// spaces in it!!
81+
// the read itself:
82+
in >> read_seq;
83+
in >> tmp;
84+
in >> scores_seq;
85+
buffer[i] = std::make_pair(read_seq, scores_seq);
86+
in.peek();
87+
}
88+
if (i < buffer.size())
89+
buffer.erase(buffer.begin() + i, buffer.end());
90+
}
91+
92+
93+
/* THIS FUNCTION FILLS A BUFFER FOR THE ACTUAL READS, REPRESENTED AS
94+
* RECORDS IN A FASTQ FILE, INCLUDING THE QUALITY SCORES
95+
*/
96+
void
97+
fill_buffer(std::ifstream &in, const size_t buffer_start,
98+
vector<MappedRead> &buffer) {
99+
size_t i = buffer_start;
100+
for (; i != buffer.size() && !in.eof(); ++i) {
101+
in >> buffer[i];
102+
in.peek();
103+
}
104+
if (i < buffer.size())
105+
buffer.erase(buffer.begin() + i, buffer.end());
106+
}

src/smithlab_cpp/FileIterator.hpp

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
/*
2+
* Copyright (C) 2009 University of Southern California and
3+
* Andrew D. Smith
4+
*
5+
* Authors: Andrew D. Smith
6+
*
7+
* This program is free software: you can redistribute it and/or modify
8+
* it under the terms of the GNU General Public License as published by
9+
* the Free Software Foundation, either version 3 of the License, or
10+
* (at your option) any later version.
11+
*
12+
* This program is distributed in the hope that it will be useful,
13+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
14+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15+
* GNU General Public License for more details.
16+
*
17+
* You should have received a copy of the GNU General Public License
18+
* along with this program. If not, see <http://www.gnu.org/licenses/>.
19+
*/
20+
21+
#ifndef FILE_ITERATOR_HPP
22+
#define FILE_ITERATOR_HPP
23+
24+
#include <string>
25+
#include <vector>
26+
#include <fstream>
27+
28+
#include "smithlab_utils.hpp"
29+
30+
template <class T>
31+
class FileIterator {
32+
public:
33+
FileIterator(const std::string f, const size_t bs);
34+
void increment_first() {
35+
if (++first == buffer.end()) {
36+
assert(first <= last);
37+
refill_buffer();
38+
}
39+
assert(first <= buffer.end() && last <= buffer.end());
40+
}
41+
void increment_last() {
42+
assert(last < buffer.end());
43+
if (++last == buffer.end()) {
44+
assert(first <= last);
45+
refill_buffer();
46+
}
47+
assert(first <= buffer.end());
48+
assert(last <= buffer.end());
49+
}
50+
void increment() {
51+
increment_last();
52+
increment_first();
53+
}
54+
typename std::vector<T>::const_iterator get_first() const {return first;}
55+
typename std::vector<T>::const_iterator get_last() const {return last;}
56+
bool first_is_good() const {return (!in.eof() || first < buffer.end());}
57+
bool last_is_good() const {return (!in.eof() || last < buffer.end());}
58+
bool is_good() const {return first_is_good() && last_is_good();}
59+
60+
private:
61+
std::ifstream in;
62+
std::vector<T> buffer;
63+
typename std::vector<T>::iterator first;
64+
typename std::vector<T>::iterator last;
65+
void refill_buffer();
66+
};
67+
68+
void
69+
fill_buffer(std::ifstream &in, const size_t buffer_start,
70+
std::vector<std::string> &buffer);
71+
72+
class GenomicRegion;
73+
void
74+
fill_buffer(std::ifstream &in, const size_t buffer_start,
75+
std::vector<GenomicRegion> &buffer);
76+
77+
typedef std::pair<std::string, std::string> FASTQRecord;
78+
void
79+
fill_buffer(std::ifstream &in, const size_t buffer_start,
80+
std::vector<FASTQRecord> &buffer);
81+
82+
struct MappedRead;
83+
void
84+
fill_buffer(std::ifstream &in, const size_t buffer_start,
85+
std::vector<MappedRead> &buffer);
86+
87+
/* THIS REFILL BUFFER IS USED WHEN INCREMENTS TO EITHER THE FIRST OR
88+
THE LAST CURRENTLY USED ELEMENTS IN THE BUFFER HIT THE END OF THE
89+
BUFFER. HOPEFULLY THE FIRST ONE WILL NOT HIT THE END BEFORE THE
90+
LAST: THE FIRST IS ALWAYS SUPPOSED TO BE LESS THAN OR EQUAL TO THE
91+
LAST.
92+
*/
93+
template <class T> void
94+
FileIterator<T>::refill_buffer() {
95+
assert(first <= last);
96+
const size_t diff = last - first;
97+
copy(first, last, buffer.begin());
98+
// Not sure if the code below actualy works or is the best way to
99+
// grow the buffer
100+
// assert(diff < buffer.size());
101+
if (diff == buffer.size()) {
102+
std::vector<T> newbuff(2*buffer.size());
103+
copy(buffer.begin(), buffer.end(), newbuff.begin());
104+
buffer.swap(newbuff);
105+
}
106+
first = buffer.begin();
107+
last = first + diff;
108+
fill_buffer(in, diff, buffer);
109+
}
110+
111+
template <class T>
112+
FileIterator<T>::FileIterator(const std::string f, const size_t bs) :
113+
buffer(std::vector<T>(bs)) {
114+
in.open(f.c_str());
115+
if (!in) throw SMITHLABException("cannot open input file " + f);
116+
fill_buffer(in, 0, buffer);
117+
first = buffer.begin();
118+
last = buffer.begin();
119+
}
120+
121+
#endif

0 commit comments

Comments
 (0)