1818
1919#include " counts_header.hpp"
2020
21- #include < iostream>
22- #include < fstream>
23- #include < string>
24- #include < vector>
2521#include < cassert>
2622#include < cstdint>
23+ #include < fstream>
24+ #include < iostream>
2725#include < sstream>
26+ #include < string>
27+ #include < vector>
2828
2929#include " bam_record_utils.hpp"
3030
3434#include " bamxx.hpp"
3535#include " dnmt_error.hpp"
3636
37- using std::vector;
3837using std::string;
3938using std::to_string;
39+ using std::unordered_map;
40+ using std::vector;
4041
4142using bamxx::bgzf_file;
4243
43- void
44+ std::unordered_map<std::string, std:: uint32_t >
4445write_counts_header_from_chrom_sizes (const vector<string> &chrom_names,
4546 const vector<uint64_t > &chrom_sizes,
4647 bgzf_file &out) {
4748 const auto version = " #DNMTOOLS " + string (VERSION) + " \n " ;
4849 out.write (version.c_str ());
50+
51+ std::unordered_map<std::string, std::uint32_t > chrom_order;
52+ std::uint32_t chrom_count = 0 ;
53+
4954 for (auto i = 0u ; i < size (chrom_sizes); ++i) {
5055 const string tmp =
5156 " #" + chrom_names[i] + " " + to_string (chrom_sizes[i]) + " \n " ;
52- out.write (tmp.c_str ());
57+ out.write (tmp.data ());
58+ chrom_order.emplace (chrom_names[i], chrom_count++);
5359 }
5460 out.write (" #\n " );
55- }
5661
62+ return chrom_order;
63+ }
5764
58- void
65+ std::unordered_map<std::string, std:: uint32_t >
5966write_counts_header_from_file (const string &header_file, bgzf_file &out) {
6067 std::ifstream in (header_file);
6168 if (!in.is_open ()) {
62- throw dnmt_error (" failed to open header file: " + header_file);
69+ throw dnmt_error (" failed to open header file: " + header_file);
6370 }
71+
72+ std::unordered_map<std::string, std::uint32_t > chrom_order;
73+ std::uint32_t chrom_count = 0 ;
74+
6475 string line;
65- while (getline (in, line)) {
76+ while (getline (in, line)) {
6677 out.write (line + ' \n ' );
78+ const auto name = line.substr (1 , line.find (' ' ) - 1 );
79+ chrom_order.emplace (name, chrom_count++);
6780 }
6881 in.close ();
69- }
7082
83+ return chrom_order;
84+ }
7185
7286bamxx::bgzf_file &
7387skip_counts_header (bamxx::bgzf_file &in) {
7488
7589 // use the kstring_t type to more directly use the BGZF file
7690 kstring_t line{0 , 0 , nullptr };
7791 const int ret = ks_resize (&line, 1024 );
78- if (ret) return in;
92+ if (ret)
93+ return in;
7994
8095 while (bamxx::getline (in, line) && line.s [0 ] == ' #' ) {
8196 if (line.s [0 ] == ' #' && line.l == 1 )
@@ -86,7 +101,6 @@ skip_counts_header(bamxx::bgzf_file &in) {
86101 return in;
87102}
88103
89-
90104int
91105get_chrom_sizes_for_counts_header (const uint32_t n_threads,
92106 const string &filename,
@@ -96,7 +110,8 @@ get_chrom_sizes_for_counts_header(const uint32_t n_threads,
96110 bamxx::bam_tpool tpool (n_threads);
97111
98112 bgzf_file in (filename, " r" );
99- if (!in) return -1 ;
113+ if (!in)
114+ return -1 ;
100115 if (n_threads > 1 && in.is_bgzf ())
101116 tpool.set_io (in);
102117
@@ -106,18 +121,22 @@ get_chrom_sizes_for_counts_header(const uint32_t n_threads,
106121 // use the kstring_t type to more directly use the BGZF file
107122 kstring_t line{0 , 0 , nullptr };
108123 const int ret = ks_resize (&line, 1024 );
109- if (ret) return -1 ;
124+ if (ret)
125+ return -1 ;
110126
111127 uint64_t chrom_size = 0 ;
112128 while (getline (in, line)) {
113129 if (line.s [0 ] == ' >' ) {
114- if (!chrom_names.empty ()) chrom_sizes.push_back (chrom_size);
130+ if (!chrom_names.empty ())
131+ chrom_sizes.push_back (chrom_size);
115132 chrom_names.emplace_back (line.s + 1 );
116133 chrom_size = 0 ;
117134 }
118- else chrom_size += line.l ;
135+ else
136+ chrom_size += line.l ;
119137 }
120- if (!chrom_names.empty ()) chrom_sizes.push_back (chrom_size);
138+ if (!chrom_names.empty ())
139+ chrom_sizes.push_back (chrom_size);
121140
122141 ks_free (&line);
123142
@@ -126,7 +145,6 @@ get_chrom_sizes_for_counts_header(const uint32_t n_threads,
126145 return 0 ;
127146}
128147
129-
130148void
131149write_counts_header_from_bam_header (const bamxx::bam_header &hdr,
132150 bgzf_file &out) {
@@ -142,7 +160,6 @@ write_counts_header_from_bam_header(const bamxx::bam_header &hdr,
142160 out.write (" #\n " );
143161}
144162
145-
146163bool
147164write_counts_header_line (string line, bgzf_file &out) {
148165 line += ' \n ' ;
@@ -153,8 +170,10 @@ write_counts_header_line(string line, bgzf_file &out) {
153170bool
154171get_has_counts_header (const string &filename) {
155172 bgzf_file in (filename, " r" );
156- if (!in) return false ;
173+ if (!in)
174+ return false ;
157175 string line;
158- if (!getline (in, line)) return false ;
176+ if (!getline (in, line))
177+ return false ;
159178 return line[0 ] == ' #' ;
160179}
0 commit comments