@@ -793,6 +793,10 @@ load_array_data(const size_t bin_size,
793793
794794 MSite site;
795795 while (read_site (in, site)) {
796+ // TODO: MN: I think that the block below should be placed later
797+ // in this scope. At this location, the methylation level of the
798+ // first site in a new chrom is contributed to the last bin of the
799+ // previous chrom.
796800 if (site.n_reads > 0 ) { // its covered by a probe
797801 ++num_probes_in_bin;
798802 if (site.meth < meth_min)
@@ -880,19 +884,26 @@ load_wgbs_data(const size_t bin_size, const string &cpgs_file,
880884 std::unordered_set<string> chroms_seen;
881885
882886 MSite site;
887+ size_t prev_pos = 0ul ;
888+ size_t sites_in_bin = 0ul ;
889+
883890 while (read_site (in, site)) {
884891 if (curr_chrom != site.chrom ) { // handle change of chrom
892+ if (sites_in_bin > 0 ) bins.back ().set_end (prev_pos);
885893 if (chroms_seen.find (site.chrom ) != end (chroms_seen))
886894 throw runtime_error (" sites not sorted" );
887895 chroms_seen.insert (site.chrom );
888896 curr_chrom = site.chrom ;
889897 reads.push_back (0 );
890898 meth.push_back (make_pair (0.0 , 0.0 ));
891899 bins.push_back (SimpleGenomicRegion (site.chrom , 0 , bin_size));
900+ sites_in_bin = 0 ;
892901 }
902+ prev_pos = site.pos ;
893903 if (site.pos < bins.back ().get_start ())
894904 throw runtime_error (" sites not sorted" );
895905 while (bins.back ().get_end () < site.pos ) {
906+ sites_in_bin = 0 ;
896907 reads.push_back (0 );
897908 meth.push_back (make_pair (0.0 , 0.0 ));
898909 bins.push_back (SimpleGenomicRegion (site.chrom , bins.back ().get_end (),
@@ -901,7 +912,9 @@ load_wgbs_data(const size_t bin_size, const string &cpgs_file,
901912 reads.back () += site.n_reads ;
902913 meth.back ().first += site.n_meth ();
903914 meth.back ().second += site.n_unmeth ();
915+ sites_in_bin++;
904916 }
917+ if (sites_in_bin > 0 ) bins.back ().set_end (prev_pos);
905918}
906919
907920
0 commit comments