Skip to content

Commit a6ddd48

Browse files
committed
Updated misplaced/wrong input handling
1 parent 0dd90d2 commit a6ddd48

File tree

4 files changed

+59
-68
lines changed

4 files changed

+59
-68
lines changed

src/overlap.cpp

Lines changed: 35 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ Overlap::Overlap(uint64_t a_id, uint64_t b_id, double, uint32_t,
1818
: q_name_(), q_id_(a_id - 1), q_begin_(a_begin), q_end_(a_end),
1919
q_length_(a_length), t_name_(), t_id_(b_id - 1), t_begin_(b_begin),
2020
t_end_(b_end), t_length_(b_length), strand_(a_rc ^ b_rc), length_(),
21-
error_(), cigar_(), is_valid_(true), is_transmuted_(true),
21+
error_(), cigar_(), is_valid_(true), is_transmuted_(false),
2222
breaking_points_(), dual_breaking_points_() {
2323

2424
length_ = std::max(q_end_ - q_begin_, t_end_ - t_begin_);
@@ -127,46 +127,53 @@ bool transmuteId(const std::unordered_map<T, uint64_t>& t_to_id, const T& t,
127127
return true;
128128
}
129129

130-
void Overlap::transmute(const std::unordered_map<std::string, uint64_t>& name_to_id,
130+
void Overlap::transmute(const std::vector<std::unique_ptr<Sequence>>& sequences,
131+
const std::unordered_map<std::string, uint64_t>& name_to_id,
131132
const std::unordered_map<uint64_t, uint64_t>& id_to_id) {
132133

133-
if (!is_valid_) {
134-
fprintf(stderr, "[racon::Overlap::transmute] error: "
135-
"overlap is not valid!\n");
136-
exit(1);
137-
}
138-
139-
if (is_transmuted_) {
140-
return;
134+
if (!is_valid_ || is_transmuted_) {
135+
return;
141136
}
142137

143138
if (!q_name_.empty()) {
144139
if (!transmuteId(name_to_id, q_name_ + "q", q_id_)) {
145-
fprintf(stderr, "[racon::Overlap::transmute] error: "
146-
"missing sequence with name %s!\n", q_name_.c_str());
147-
exit(1);
148-
}
149-
} else {
150-
if (!transmuteId(id_to_id, q_id_ << 1 | 0, q_id_)) {
151-
fprintf(stderr, "[racon::Overlap::transmute] error: "
152-
"missing sequence with id %zu!\n", q_id_);
153-
exit(1);
140+
is_valid_ = false;
141+
return;
154142
}
143+
std::string().swap(q_name_);
144+
} else if (!transmuteId(id_to_id, q_id_ << 1 | 0, q_id_)) {
145+
is_valid_ = false;
146+
return;
147+
}
148+
149+
if (q_length_ != sequences[q_id_]->data().size()) {
150+
fprintf(stderr, "[racon::overlap::find_breaking_points] error: "
151+
"unequal lengths in sequence and overlap file for sequence %s!\n",
152+
sequences[q_id_]->name().c_str());
153+
exit(1);
155154
}
155+
156156
if (!t_name_.empty()) {
157157
if (!transmuteId(name_to_id, t_name_ + "t", t_id_)) {
158-
fprintf(stderr, "[racon::Overlap::transmute] error: "
159-
"missing target sequence with name %s!\n", t_name_.c_str());
160-
exit(1);
161-
}
162-
} else {
163-
if (!transmuteId(id_to_id, t_id_ << 1 | 1, t_id_)) {
164-
fprintf(stderr, "[racon::Overlap::transmute] error: "
165-
"missing sequence with id %zu!\n", t_id_);
166-
exit(1);
158+
is_valid_ = false;
159+
return;
167160
}
161+
std::string().swap(t_name_);
162+
} else if (!transmuteId(id_to_id, t_id_ << 1 | 1, t_id_)) {
163+
is_valid_ = false;
164+
return;
165+
}
166+
167+
if (t_length_ != 0 && t_length_ != sequences[t_id_]->data().size()) {
168+
fprintf(stderr, "[racon::overlap::find_breaking_points] error: "
169+
"unequal lengths in target and overlap file for target %s!\n",
170+
sequences[t_id_]->name().c_str());
171+
exit(1);
168172
}
169173

174+
// for SAM input
175+
t_length_ = sequences[t_id_]->data().size();
176+
170177
is_transmuted_ = true;
171178
}
172179

@@ -183,16 +190,6 @@ void Overlap::find_breaking_points(const std::vector<std::unique_ptr<Sequence>>&
183190
return;
184191
}
185192

186-
if (q_length_ != sequences[q_id_]->data().size() &&
187-
q_length_ != sequences[q_id_]->reverse_complement().size()) {
188-
189-
fprintf(stderr, "[racon::overlap::find_breaking_points] error: "
190-
"mismatched sequence lengths in sequence and overlap file!\n");
191-
exit(1);
192-
}
193-
194-
t_length_ = sequences[t_id_]->data().size();
195-
196193
if (cigar_.empty()) {
197194
// align overlaps with edlib
198195
const char* q = !strand_ ? &(sequences[q_id_]->data()[q_begin_]) :

src/overlap.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,8 @@ class Overlap {
4949
return is_valid_;
5050
}
5151

52-
void transmute(const std::unordered_map<std::string, uint64_t>& name_to_id,
52+
void transmute(const std::vector<std::unique_ptr<Sequence>>& sequences,
53+
const std::unordered_map<std::string, uint64_t>& name_to_id,
5354
const std::unordered_map<uint64_t, uint64_t>& id_to_id);
5455

5556
uint32_t length() const {

src/polisher.cpp

Lines changed: 8 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -202,26 +202,19 @@ void Polisher::initialize() {
202202

203203
auto it = name_to_id.find(sequences_[i]->name() + "t");
204204
if (it != name_to_id.end()) {
205-
uint64_t j = it->second;
206-
if (j >= targets_size) {
207-
fprintf(stderr, "[racon::Polisher::initialize] error: "
208-
"duplicate sequence %s in file\n",
209-
sequences_[i]->name().c_str());
210-
exit(1);
211-
}
212-
if (sequences_[i]->data().size() != sequences_[j]->data().size() ||
213-
sequences_[i]->quality().size() != sequences_[j]->quality().size()) {
205+
if (sequences_[i]->data().size() != sequences_[it->second]->data().size() ||
206+
sequences_[i]->quality().size() != sequences_[it->second]->quality().size()) {
214207

215208
fprintf(stderr, "[racon::Polisher::initialize] error: "
216209
"duplicate sequence %s with unequal data\n",
217210
sequences_[i]->name().c_str());
218211
exit(1);
219212
}
220213

221-
name_to_id[sequences_[i]->name() + "q"] = j;
222-
id_to_id[sequences_size << 1 | 0] = j;
214+
name_to_id[sequences_[i]->name() + "q"] = it->second;
215+
id_to_id[sequences_size << 1 | 0] = it->second;
223216

224-
duplicate_sequences.insert(j);
217+
duplicate_sequences.insert(it->second);
225218
sequences_[i].reset();
226219
++n;
227220
} else {
@@ -286,11 +279,12 @@ void Polisher::initialize() {
286279

287280
uint64_t c = l;
288281
for (uint64_t i = l; i < overlaps.size(); ++i) {
282+
overlaps[i]->transmute(sequences_, name_to_id, id_to_id);
283+
289284
if (!overlaps[i]->is_valid()) {
290285
overlaps[i].reset();
291286
continue;
292287
}
293-
overlaps[i]->transmute(name_to_id, id_to_id);
294288

295289
while (overlaps[c] == nullptr) {
296290
++c;
@@ -323,8 +317,6 @@ void Polisher::initialize() {
323317
}
324318
}
325319

326-
fprintf(stderr, "[racon::Polisher::initialize] loaded batch of overlaps\n");
327-
328320
uint64_t n = shrinkToFit(overlaps, l);
329321
l = c - n;
330322

@@ -341,6 +333,7 @@ void Polisher::initialize() {
341333
"empty overlap set!\n");
342334
exit(1);
343335
}
336+
fprintf(stderr, "[racon::Polisher::initialize] loaded overlaps\n");
344337

345338
std::vector<std::future<void>> thread_futures;
346339
for (uint64_t i = 0; i < sequences_.size(); ++i) {

test/racon_test.cpp

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ class RaconPolishingTest: public ::testing::Test {
3131
uint32_t window_length, double quality_threshold, double error_threshold,
3232
int8_t match,int8_t mismatch, int8_t gap) {
3333

34-
polisher = createPolisher(sequences_path, overlaps_path, target_path,
34+
polisher = racon::createPolisher(sequences_path, overlaps_path, target_path,
3535
type, window_length, quality_threshold, error_threshold, match,
3636
mismatch, gap, 4);
3737
}
@@ -51,33 +51,33 @@ class RaconPolishingTest: public ::testing::Test {
5151
std::unique_ptr<racon::Polisher> polisher;
5252
};
5353

54-
TEST(RaconTest, PolisherTypeError) {
55-
EXPECT_DEATH((createPolisher("", "", "", static_cast<racon::PolisherType>(3),
54+
TEST(RaconInitializeTest, PolisherTypeError) {
55+
EXPECT_DEATH((racon::createPolisher("", "", "", static_cast<racon::PolisherType>(3),
5656
0, 0, 0, 0, 0, 0, 0)), ".racon::createPolisher. error: invalid polisher"
5757
" type!");
5858
}
5959

60-
TEST(RaconTest, WindowLengthError) {
61-
EXPECT_DEATH((createPolisher("", "", "", racon::PolisherType::kC, 0, 0, 0, 0,
62-
0, 0, 0)), ".racon::createPolisher. error: invalid window length!");
60+
TEST(RaconInitializeTest, WindowLengthError) {
61+
EXPECT_DEATH((racon::createPolisher("", "", "", racon::PolisherType::kC, 0,
62+
0, 0, 0, 0, 0, 0)), ".racon::createPolisher. error: invalid window length!");
6363
}
6464

65-
TEST(RaconTest, SequencesPathExtensionError) {
66-
EXPECT_DEATH((createPolisher("", "", "", racon::PolisherType::kC, 500, 0,
67-
0, 0, 0, 0, 0)), ".racon::createPolisher. error: file has unsupported "
65+
TEST(RaconInitializeTest, SequencesPathExtensionError) {
66+
EXPECT_DEATH((racon::createPolisher("", "", "", racon::PolisherType::kC, 500,
67+
0, 0, 0, 0, 0, 0)), ".racon::createPolisher. error: file has unsupported "
6868
"format extension .valid extensions: .fasta, .fasta.gz, .fa, .fa.gz, "
6969
".fastq, .fastq.gz, .fq, .fq.gz.!");
7070
}
7171

72-
TEST(RaconTest, OverlapsPathExtensionError) {
73-
EXPECT_DEATH((createPolisher(racon_test_data_path + "sample_reads.fastq.gz", "",
74-
"", racon::PolisherType::kC, 500, 0, 0, 0, 0, 0, 0)),
72+
TEST(RaconInitializeTest, OverlapsPathExtensionError) {
73+
EXPECT_DEATH((racon::createPolisher(racon_test_data_path + "sample_reads.fastq.gz",
74+
"", "", racon::PolisherType::kC, 500, 0, 0, 0, 0, 0, 0)),
7575
".racon::createPolisher. error: file has unsupported format extension "
7676
".valid extensions: .mhap, .mhap.gz, .paf, .paf.gz, .sam, .sam.gz.!");
7777
}
7878

79-
TEST(RaconTest, TargetPathExtensionError) {
80-
EXPECT_DEATH((createPolisher(racon_test_data_path + "sample_reads.fastq.gz",
79+
TEST(RaconInitializeTest, TargetPathExtensionError) {
80+
EXPECT_DEATH((racon::createPolisher(racon_test_data_path + "sample_reads.fastq.gz",
8181
racon_test_data_path + "sample_overlaps.paf.gz", "", racon::PolisherType::kC,
8282
500, 0, 0, 0, 0, 0, 0)), ".racon::createPolisher. error: file has "
8383
"unsupported format extension .valid extensions: .fasta, .fasta.gz, .fa,"

0 commit comments

Comments
 (0)