Skip to content

Commit 21c7d04

Browse files
authored
Merge pull request seqan#3248 from eseiler/misc/refine_tags
[MISC] Refine unknown tag handling (HD/PG)
2 parents 41a17ad + b638661 commit 21c7d04

File tree

7 files changed

+94
-38
lines changed

7 files changed

+94
-38
lines changed

include/seqan3/io/sam_file/detail/format_sam_base.hpp

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -341,13 +341,18 @@ inline void format_sam_base::read_header(stream_view_type && stream_view,
341341
read_forward_range_field(string_buffer, value);
342342
};
343343

344-
auto print_cerr_of_unspported_tag = [&options](char const * const header_tag, std::array<char, 2> raw_tag)
344+
auto consume_unsupported_tag_and_print_warning =
345+
[&](char const * const header_tag, std::array<char, 2> const raw_tag)
345346
{
347+
// Not using `copy_next_tag_value_into_buffer` because we do not care whether the tag is valid.
348+
// E.g., `pb5.0.0` instead of `pb:5.0.0`, would break the parsing if we used `copy_next_tag_value_into_buffer`.
349+
take_until_predicate(is_char<'\t'> || is_char<'\n'>);
350+
346351
if (options.stream_warnings_to == nullptr)
347352
return;
348353

349-
*options.stream_warnings_to << "Unsupported SAM header tag in @" << header_tag << ": " << raw_tag[0]
350-
<< raw_tag[1] << '\n';
354+
*options.stream_warnings_to << "Unsupported tag found in SAM header @" << header_tag << ": \"" << raw_tag[0]
355+
<< raw_tag[1] << string_buffer << "\"\n";
351356
};
352357

353358
while (it != end && is_char<'@'>(*it))
@@ -388,7 +393,7 @@ inline void format_sam_base::read_header(stream_view_type && stream_view,
388393
}
389394
default: // unsupported header tag
390395
{
391-
print_cerr_of_unspported_tag("HD", raw_tag);
396+
consume_unsupported_tag_and_print_warning("HD", raw_tag);
392397
}
393398
}
394399

@@ -397,8 +402,6 @@ inline void format_sam_base::read_header(stream_view_type && stream_view,
397402
copy_next_tag_value_into_buffer();
398403
read_forward_range_field(string_buffer, *header_entry);
399404
}
400-
else
401-
skip_until_predicate(is_char<'\t'> || is_char<'\n'>);
402405
}
403406
++it; // skip newline
404407

@@ -562,7 +565,7 @@ inline void format_sam_base::read_header(stream_view_type && stream_view,
562565
}
563566
default: // unsupported header tag
564567
{
565-
print_cerr_of_unspported_tag("PG", raw_tag);
568+
consume_unsupported_tag_and_print_warning("PG", raw_tag);
566569
}
567570
}
568571

@@ -571,8 +574,6 @@ inline void format_sam_base::read_header(stream_view_type && stream_view,
571574
copy_next_tag_value_into_buffer();
572575
read_forward_range_field(string_buffer, *program_info_entry);
573576
}
574-
else
575-
skip_until_predicate(is_char<'\t'> || is_char<'\n'>);
576577
}
577578
++it; // skip newline
578579

test/snippet/io/sam_file/sam_file_input_options.cpp

Lines changed: 37 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ struct temporary_file
3131
}
3232
};
3333

34-
static constexpr auto sam_file_raw = R"(@HD VN:1.6 pb:5.0.0
34+
static constexpr auto sam_file_raw = R"(@HD VN:1.6 pb:5.0.0 ot:ter
3535
@SQ SN:ref LN:34
3636
)";
3737

@@ -43,16 +43,14 @@ static auto get_sam_file_input()
4343
void defaults_to_cerr()
4444
{
4545
auto fin = get_sam_file_input();
46-
std::cerr << "Written to cerr: ";
47-
auto it = fin.begin(); // Prints to cerr: "Unsupported SAM header tag in @HD: pb"
46+
auto it = fin.begin();
4847
}
4948

5049
void redirect_to_cout()
5150
{
5251
auto fin = get_sam_file_input();
5352
fin.options.stream_warnings_to = std::addressof(std::cout); // Equivalent to `= &std::cout;`
54-
std::cout << "Written to cout: ";
55-
auto it = fin.begin(); // Prints to cout: "Unsupported SAM header tag in @HD: pb"
53+
auto it = fin.begin();
5654
}
5755

5856
void redirect_to_file()
@@ -63,23 +61,54 @@ void redirect_to_file()
6361
{ // Inner scope to close file before reading
6462
std::ofstream warning_file{tmp_file.path};
6563
fin.options.stream_warnings_to = std::addressof(warning_file); // Equivalent to `= &warning_file;`
66-
auto it = fin.begin(); // Prints to file: "Unsupported SAM header tag in @HD: pb"
64+
auto it = fin.begin();
6765
}
6866

69-
std::cout << "Written to file: " << tmp_file.read_content();
67+
std::cout << "File content:\n" << tmp_file.read_content();
7068
}
7169

7270
void silence_warnings()
7371
{
7472
auto fin = get_sam_file_input();
7573
fin.options.stream_warnings_to = nullptr;
76-
auto it = fin.begin(); // No warning emitted
74+
auto it = fin.begin();
75+
}
76+
77+
void filter()
78+
{
79+
auto fin = get_sam_file_input();
80+
std::stringstream stream{};
81+
fin.options.stream_warnings_to = std::addressof(stream); // Equivalent to `= &stream;`
82+
auto it = fin.begin();
83+
84+
for (std::string line{}; std::getline(stream, line);)
85+
{
86+
// If "pb" is not found in the warning, print it to cerr.
87+
if (line.find("pb") == std::string::npos) // C++23: `!line.contains("pb")`
88+
std::cerr << line << '\n';
89+
}
90+
}
91+
92+
void print_section(std::string_view const section)
93+
{
94+
std::cout << "### " << section << " ###\n";
95+
std::cerr << "### " << section << " ###\n";
7796
}
7897

7998
int main()
8099
{
100+
print_section("defaults_to_cerr");
81101
defaults_to_cerr();
102+
103+
print_section("redirect_to_cout");
82104
redirect_to_cout();
105+
106+
print_section("redirect_to_file");
83107
redirect_to_file();
108+
109+
print_section("silence_warnings");
84110
silence_warnings();
111+
112+
print_section("filter");
113+
filter();
85114
}
Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,8 @@
1-
Written to cerr: Unsupported SAM header tag in @HD: pb
1+
### defaults_to_cerr ###
2+
Unsupported tag found in SAM header @HD: "pb:5.0.0"
3+
Unsupported tag found in SAM header @HD: "ot:ter"
4+
### redirect_to_cout ###
5+
### redirect_to_file ###
6+
### silence_warnings ###
7+
### filter ###
8+
Unsupported tag found in SAM header @HD: "ot:ter"
Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,10 @@
1-
Written to cout: Unsupported SAM header tag in @HD: pb
2-
Written to file: Unsupported SAM header tag in @HD: pb
1+
### defaults_to_cerr ###
2+
### redirect_to_cout ###
3+
Unsupported tag found in SAM header @HD: "pb:5.0.0"
4+
Unsupported tag found in SAM header @HD: "ot:ter"
5+
### redirect_to_file ###
6+
File content:
7+
Unsupported tag found in SAM header @HD: "pb:5.0.0"
8+
Unsupported tag found in SAM header @HD: "ot:ter"
9+
### silence_warnings ###
10+
### filter ###

test/unit/io/sam_file/format_bam_test.cpp

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -38,11 +38,14 @@ struct sam_file_read<seqan3::format_bam> : public sam_file_data
3838
'\x72', '\x65', '\x66', '\x00', '\x22', '\x00', '\x00', '\x00'};
3939

4040
std::string unknown_tag_header{
41-
'\x42', '\x41', '\x4d', '\x01', '\x25', '\x00', '\x00', '\x00', '\x40', '\x48', '\x44', '\x09', '\x56',
41+
'\x42', '\x41', '\x4d', '\x01', '\x4b', '\x00', '\x00', '\x00', '\x40', '\x48', '\x44', '\x09', '\x56',
4242
'\x4e', '\x3a', '\x31', '\x2e', '\x36', '\x09', '\x70', '\x62', '\x3a', '\x35', '\x2e', '\x30', '\x2e',
43-
'\x30', '\x0a', '\x40', '\x53', '\x51', '\x09', '\x53', '\x4e', '\x3a', '\x72', '\x65', '\x66', '\x09',
44-
'\x4c', '\x4e', '\x3a', '\x33', '\x34', '\x0a', '\x01', '\x00', '\x00', '\x00', '\x04', '\x00', '\x00',
45-
'\x00', '\x72', '\x65', '\x66', '\x00', '\x22', '\x00', '\x00', '\x00'};
43+
'\x30', '\x09', '\x6f', '\x74', '\x74', '\x65', '\x72', '\x0a', '\x40', '\x53', '\x51', '\x09', '\x53',
44+
'\x4e', '\x3a', '\x72', '\x65', '\x66', '\x09', '\x4c', '\x4e', '\x3a', '\x33', '\x34', '\x0a', '\x40',
45+
'\x50', '\x47', '\x09', '\x49', '\x44', '\x3a', '\x6e', '\x6f', '\x76', '\x6f', '\x61', '\x6c', '\x69',
46+
'\x67', '\x6e', '\x09', '\x70', '\x62', '\x3a', '\x35', '\x2e', '\x30', '\x2e', '\x30', '\x09', '\x6f',
47+
'\x74', '\x74', '\x65', '\x72', '\x0a', '\x01', '\x00', '\x00', '\x00', '\x04', '\x00', '\x00', '\x00',
48+
'\x72', '\x65', '\x66', '\x00', '\x22', '\x00', '\x00', '\x00'};
4649

4750
std::string big_header_input{
4851
'\x42', '\x41', '\x4D', '\x01', '\xB7', '\x01', '\x00', '\x00', '\x40', '\x48', '\x44', '\x09', '\x56', '\x4E',

test/unit/io/sam_file/format_sam_test.cpp

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,12 @@ struct sam_file_read<seqan3::format_sam> : public sam_file_data
2020
@SQ SN:ref LN:34
2121
)"};
2222

23+
// "otter" is not valid because a user-defined/local tag must have the format [TAG]:[VALUE].
24+
// However, encountering such a tag should not break the parsing.
2325
std::string unknown_tag_header{
24-
R"(@HD VN:1.6 pb:5.0.0
26+
R"(@HD VN:1.6 pb:5.0.0 otter
2527
@SQ SN:ref LN:34
28+
@PG ID:novoalign pb:5.0.0 otter
2629
)"};
2730

2831
std::string big_header_input{
@@ -176,16 +179,17 @@ TEST_F(sam_format, header_errors)
176179
"@SQ\tSN:ref2\tLN:243199373\tSB:user_tag\tLB:user_tag\tpb:user_tag\n"
177180
"@RG\tID:U0a_A2_L1\tIB:user_tag\tpb:user_tag\n"
178181
"@PG\tID:qc\tIB:user_tag\tPB:user_tag\tCB:user_tag\tDB:user_tag\tVB:user_tag\tpb:user_tag\n"};
179-
std::string expected_cerr{"Unsupported SAM header tag in @HD: VB\n"
180-
"Unsupported SAM header tag in @HD: SB\n"
181-
"Unsupported SAM header tag in @HD: GB\n"
182-
"Unsupported SAM header tag in @HD: pb\n"
183-
"Unsupported SAM header tag in @PG: IB\n"
184-
"Unsupported SAM header tag in @PG: PB\n"
185-
"Unsupported SAM header tag in @PG: CB\n"
186-
"Unsupported SAM header tag in @PG: DB\n"
187-
"Unsupported SAM header tag in @PG: VB\n"
188-
"Unsupported SAM header tag in @PG: pb\n"};
182+
std::string expected_cerr{"Unsupported tag found in SAM header @HD: \"VB:user_tag\"\n"
183+
"Unsupported tag found in SAM header @HD: \"SB:user_tag\"\n"
184+
"Unsupported tag found in SAM header @HD: \"GB:user_tag\"\n"
185+
"Unsupported tag found in SAM header @HD: \"pb:user_tag\"\n"
186+
"Unsupported tag found in SAM header @PG: \"IB:user_tag\"\n"
187+
"Unsupported tag found in SAM header @PG: \"PB:user_tag\"\n"
188+
"Unsupported tag found in SAM header @PG: \"CB:user_tag\"\n"
189+
"Unsupported tag found in SAM header @PG: \"DB:user_tag\"\n"
190+
"Unsupported tag found in SAM header @PG: \"VB:user_tag\"\n"
191+
"Unsupported tag found in SAM header @PG: \"pb:user_tag\"\n"};
192+
189193
std::istringstream istream(header_str);
190194
seqan3::sam_file_input fin{istream, seqan3::format_sam{}};
191195

test/unit/io/sam_file/sam_file_format_test_template.hpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -359,6 +359,10 @@ TYPED_TEST_P(sam_file_read, issue2423)
359359

360360
TYPED_TEST_P(sam_file_read, unknown_header_tag)
361361
{
362+
constexpr std::string_view expected_warning = "Unsupported tag found in SAM header @HD: \"pb:5.0.0\"\n"
363+
"Unsupported tag found in SAM header @HD: \"otter\"\n"
364+
"Unsupported tag found in SAM header @PG: \"pb:5.0.0\"\n"
365+
"Unsupported tag found in SAM header @PG: \"otter\"\n";
362366
// Default: Warnings to cerr
363367
{
364368
typename TestFixture::stream_type istream{this->unknown_tag_header};
@@ -367,7 +371,7 @@ TYPED_TEST_P(sam_file_read, unknown_header_tag)
367371
testing::internal::CaptureStderr();
368372
EXPECT_NO_THROW(fin.begin());
369373
EXPECT_EQ(testing::internal::GetCapturedStdout(), "");
370-
EXPECT_EQ(testing::internal::GetCapturedStderr(), "Unsupported SAM header tag in @HD: pb\n");
374+
EXPECT_EQ(testing::internal::GetCapturedStderr(), expected_warning);
371375
}
372376
// Redirect to cout
373377
{
@@ -377,7 +381,7 @@ TYPED_TEST_P(sam_file_read, unknown_header_tag)
377381
testing::internal::CaptureStdout();
378382
testing::internal::CaptureStderr();
379383
EXPECT_NO_THROW(fin.begin());
380-
EXPECT_EQ(testing::internal::GetCapturedStdout(), "Unsupported SAM header tag in @HD: pb\n");
384+
EXPECT_EQ(testing::internal::GetCapturedStdout(), expected_warning);
381385
EXPECT_EQ(testing::internal::GetCapturedStderr(), "");
382386
}
383387
// Redirect to file
@@ -403,7 +407,7 @@ TYPED_TEST_P(sam_file_read, unknown_header_tag)
403407
std::ifstream warning_file{filename};
404408
ASSERT_TRUE(warning_file.good());
405409
std::string content{std::istreambuf_iterator<char>(warning_file), std::istreambuf_iterator<char>()};
406-
EXPECT_EQ(content, "Unsupported SAM header tag in @HD: pb\n");
410+
EXPECT_EQ(content, expected_warning);
407411
}
408412
// Silence
409413
{

0 commit comments

Comments
 (0)