Skip to content

Commit dc0198e

Browse files
Merge branch 'zlib-to-bgzf' of github.com:smithlabcode/dnmtools into zlib-to-bgzf
2 parents c76ee2b + ec1edeb commit dc0198e

File tree

9 files changed

+322
-28
lines changed

9 files changed

+322
-28
lines changed

data/md5sum.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,3 +18,4 @@ b5270bb38d798741cfa74f411a0d49bf tests/tRex1_promoters.roi.bed
1818
34f5eddd80d5d35131f92697d4cca896 tests/reads.counts.sym
1919
5d1ecf8d4c8a62b274f5d07e7de7d01c tests/reads.fmt.srt.uniq.sam
2020
b067a733102e611ca614ae22fc944471 tests/reads.ustats
21+
0048de3fc412cb12ec2e070c8151f86f tests/methylome_ab.diff

data/methylome_a.counts.sym

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
chr1 163 + CpG 0.885371 2495
2+
chr1 206 + CpG 0.900059 3362
3+
chr1 232 + CpG 0.891898 4283
4+
chr1 278 + CpG 0.895936 4872
5+
chr1 296 + CpG 0.904536 5070
6+
chr1 310 + CpG 0.900655 5194
7+
chr1 322 + CpG 0.102944 5333
8+
chr1 324 + CpG 0.0979768 5338
9+
chr1 350 + CpG 0.0992214 5523
10+
chr1 356 + CpG 0.0980427 5569
11+
chr1 358 + CpG 0.0944375 5591
12+
chr1 367 + CpG 0.0983925 5661
13+
chr1 388 + CpG 0.100379 5808
14+
chr1 402 + CpG 0.894009 5859
15+
chr1 404 + CpG 0.898489 5891
16+
chr1 422 + CpG 0.890427 5996
17+
chr1 434 + CpG 0.891272 6061
18+
chr1 442 + CpG 0.890658 6091
19+
chr1 448 + CpG 0.896047 6147
20+
chr1 461 + CpG 0.893198 6189
21+
chr1 467 + CpG 0.895397 6214
22+
chr1 473 + CpG 0.890295 6244
23+
chr1 485 + CpG 0.896256 6304
24+
chr1 488 + CpG 0.897663 6332
25+
chr1 496 + CpG 0.896302 6355
26+
chr1 502 + CpG 0.895712 6367
27+
chr1 514 + CpG 0.896622 6365
28+
chr1 517 + CpG 0.895009 6372
29+
chr1 520 + CpG 0.892313 6361
30+
chr1 522 + CpG 0.894836 6352
31+
chr1 535 + CpG 0.893348 6404
32+
chr1 537 + CpG 0.900701 6415
33+
chr1 540 + CpG 0.898191 6414
34+
chr1 564 + CpG 0.893356 6367
35+
chr1 569 + CpG 0.89719 6371
36+
chr1 572 + CpG 0.89482 6332
37+
chr1 577 + CpG 0.892193 6289
38+
chr1 583 + CpG 0.894065 6268
39+
chr1 585 + CpG 0.894627 6254
40+
chr1 588 + CpG 0.896248 6236
41+
chr1 594 + CpG 0.896346 6213
42+
chr1 602 + CpG 0.893856 6152
43+
chr1 606 + CpG 0.900572 6115
44+
chr1 609 + CpG 0.889762 6105
45+
chr1 612 + CpG 0.90954 6069
46+
chr1 617 + CpG 0.89103 6020
47+
chr1 620 + CpG 0.897577 5985
48+
chr1 631 + CpG 0.896323 5874
49+
chr1 633 + CpG 0.895214 5850
50+
chr1 642 + CpG 0.900296 5737
51+
chr1 650 + CpG 0.902435 5709
52+
chr1 654 + CpG 0.896709 5683
53+
chr1 660 + CpG 0.897639 5676
54+
chr1 665 + CpG 0.886054 5643
55+
chr1 673 + CpG 0.900411 5593
56+
chr1 679 + CpG 0.892864 5535
57+
chr1 681 + CpG 0.895913 5505
58+
chr1 684 + CpG 0.906811 5462
59+
chr1 702 + CpG 0.893238 5339
60+
chr1 705 + CpG 0.893273 5322
61+
chr1 708 + CpG 0.89059 5292
62+
chr1 710 + CpG 0.895027 5268
63+
chr1 713 + CpG 0.896526 5267
64+
chr1 729 + CpG 0.891296 5170
65+
chr1 731 + CpG 0.894326 5129
66+
chr1 737 + CpG 0.101157 5101
67+
chr1 745 + CpG 0.098996 4980
68+
chr1 755 + CpG 0.103188 4768
69+
chr1 757 + CpG 0.0993447 4731
70+
chr1 760 + CpG 0.0984832 4681
71+
chr1 766 + CpG 0.100824 4612
72+
chr1 779 + CpG 0.097355 4499
73+
chr1 785 + CpG 0.104054 4440
74+
chr1 787 + CpG 0.0980481 4406
75+
chr1 792 + CpG 0.104547 4333
76+
chr1 799 + CpG 0.0990355 4251
77+
chr1 801 + CpG 0.0969194 4220
78+
chr1 804 + CpG 0.884496 4199
79+
chr1 816 + CpG 0.89358 4003
80+
chr1 824 + CpG 0.893299 3880
81+
chr1 828 + CpG 0.892152 3848
82+
chr1 831 + CpG 0.890568 3838
83+
chr1 834 + CpG 0.891522 3798
84+
chr1 839 + CpG 0.897553 3719
85+
chr1 845 + CpG 0.899183 3670
86+
chr1 853 + CpG 0.898612 3531
87+
chr1 857 + CpG 0.900296 3380
88+
chr1 860 + CpG 0.896175 3294
89+
chr1 863 + CpG 0.892756 3189
90+
chr1 868 + CpG 0.891703 3001
91+
chr1 874 + CpG 0.886834 2757
92+
chr1 882 + CpG 0.907975 2445
93+
chr1 886 + CpG 0.880694 2305
94+
chr1 889 + CpG 0.882969 2196
95+
chr1 892 + CpG 0.896952 2067
96+
chr1 894 + CpG 0.889332 2006
97+
chr1 897 + CpG 0.886603 1896
98+
chr1 903 + CpG 0.896429 1680
99+
chr1 911 + CpG 0.881223 1406
100+
chr1 915 + CpG 0.868526 1255
101+
chr1 918 + CpG 0.887457 1164
102+
chr1 921 + CpG 0.887417 1057
103+
chr1 923 + CpG 0.872802 967
104+
chr1 926 + CpG 0.875887 846
105+
chr1 932 + CpG 0.88853 619
106+
chr1 940 + CpG 0.865714 350
107+
chr1 944 + CpG 0.884058 207
108+
chr1 947 + CpG 0.708738 103

data/methylome_b.counts.sym

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
chr1 163 + CpG 0.896375 2538
2+
chr1 206 + CpG 0.897481 3414
3+
chr1 232 + CpG 0.888131 4398
4+
chr1 278 + CpG 0.894575 4866
5+
chr1 296 + CpG 0.892725 5127
6+
chr1 310 + CpG 0.893081 5275
7+
chr1 322 + CpG 0.899683 5363
8+
chr1 324 + CpG 0.892228 5391
9+
chr1 350 + CpG 0.898259 5573
10+
chr1 356 + CpG 0.884881 5655
11+
chr1 358 + CpG 0.899364 5664
12+
chr1 367 + CpG 0.887803 5731
13+
chr1 388 + CpG 0.8979 5906
14+
chr1 402 + CpG 0.887257 6058
15+
chr1 404 + CpG 0.889255 6077
16+
chr1 422 + CpG 0.892903 6200
17+
chr1 434 + CpG 0.898734 6320
18+
chr1 442 + CpG 0.896819 6319
19+
chr1 448 + CpG 0.89785 6373
20+
chr1 461 + CpG 0.105519 6378
21+
chr1 467 + CpG 0.0939797 6395
22+
chr1 473 + CpG 0.0959203 6422
23+
chr1 485 + CpG 0.0894118 6375
24+
chr1 488 + CpG 0.101708 6381
25+
chr1 496 + CpG 0.0996085 6385
26+
chr1 502 + CpG 0.0979121 6322
27+
chr1 514 + CpG 0.100904 6303
28+
chr1 517 + CpG 0.0972134 6316
29+
chr1 520 + CpG 0.0994906 6282
30+
chr1 522 + CpG 0.0937997 6290
31+
chr1 535 + CpG 0.100511 6258
32+
chr1 537 + CpG 0.090749 6248
33+
chr1 540 + CpG 0.103028 6241
34+
chr1 564 + CpG 0.0994068 6237
35+
chr1 569 + CpG 0.100256 6244
36+
chr1 572 + CpG 0.0916251 6221
37+
chr1 577 + CpG 0.0930195 6203
38+
chr1 583 + CpG 0.101648 6188
39+
chr1 585 + CpG 0.0988014 6174
40+
chr1 588 + CpG 0.888853 6154
41+
chr1 594 + CpG 0.895356 6116
42+
chr1 602 + CpG 0.892945 6109
43+
chr1 606 + CpG 0.894495 6085
44+
chr1 609 + CpG 0.899095 6075
45+
chr1 612 + CpG 0.906008 6075
46+
chr1 617 + CpG 0.890079 6068
47+
chr1 620 + CpG 0.893045 6068
48+
chr1 631 + CpG 0.896924 6015
49+
chr1 633 + CpG 0.906344 5990
50+
chr1 642 + CpG 0.891856 5992
51+
chr1 650 + CpG 0.895422 5919
52+
chr1 654 + CpG 0.892596 5875
53+
chr1 660 + CpG 0.901209 5790
54+
chr1 665 + CpG 0.892863 5731
55+
chr1 673 + CpG 0.910198 5668
56+
chr1 679 + CpG 0.897883 5621
57+
chr1 681 + CpG 0.889067 5607
58+
chr1 684 + CpG 0.89372 5589
59+
chr1 702 + CpG 0.895077 5423
60+
chr1 705 + CpG 0.892007 5380
61+
chr1 708 + CpG 0.895394 5363
62+
chr1 710 + CpG 0.894124 5327
63+
chr1 713 + CpG 0.891525 5310
64+
chr1 729 + CpG 0.892229 5057
65+
chr1 731 + CpG 0.901375 5019
66+
chr1 737 + CpG 0.889204 4937
67+
chr1 745 + CpG 0.892381 4804
68+
chr1 755 + CpG 0.898156 4664
69+
chr1 757 + CpG 0.888985 4648
70+
chr1 760 + CpG 0.893792 4623
71+
chr1 766 + CpG 0.900198 4539
72+
chr1 779 + CpG 0.897518 4352
73+
chr1 785 + CpG 0.89578 4289
74+
chr1 787 + CpG 0.895231 4257
75+
chr1 792 + CpG 0.897337 4169
76+
chr1 799 + CpG 0.898918 4066
77+
chr1 801 + CpG 0.899803 4052
78+
chr1 804 + CpG 0.897532 4011
79+
chr1 816 + CpG 0.895012 3829
80+
chr1 824 + CpG 0.903985 3739
81+
chr1 828 + CpG 0.898031 3707
82+
chr1 831 + CpG 0.892002 3676
83+
chr1 834 + CpG 0.905847 3643
84+
chr1 839 + CpG 0.889659 3607
85+
chr1 845 + CpG 0.893179 3548
86+
chr1 853 + CpG 0.889873 3387
87+
chr1 857 + CpG 0.887658 3249
88+
chr1 860 + CpG 0.89147 3142
89+
chr1 863 + CpG 0.889328 3036
90+
chr1 868 + CpG 0.89277 2863
91+
chr1 874 + CpG 0.892424 2640
92+
chr1 882 + CpG 0.899573 2340
93+
chr1 886 + CpG 0.890511 2192
94+
chr1 889 + CpG 0.892601 2095
95+
chr1 892 + CpG 0.898949 1999
96+
chr1 894 + CpG 0.891316 1923
97+
chr1 897 + CpG 0.873894 1808
98+
chr1 903 + CpG 0.893949 1603
99+
chr1 911 + CpG 0.883738 1359
100+
chr1 915 + CpG 0.870968 1209
101+
chr1 918 + CpG 0.889488 1113
102+
chr1 921 + CpG 0.892644 1006
103+
chr1 923 + CpG 0.884289 942
104+
chr1 926 + CpG 0.890361 830
105+
chr1 932 + CpG 0.893142 627
106+
chr1 940 + CpG 0.830357 336
107+
chr1 944 + CpG 0.0909091 187
108+
chr1 947 + CpG 0.120482 83

src/analysis/bsrate.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -332,12 +332,10 @@ main_bsrate(int argc, const char **argv) {
332332
const int32_t the_tid = get_tid(aln);
333333

334334
// make sure all reads from same chrom are contiguous in the file
335-
if (chroms_seen.find(the_tid) != end(chroms_seen)) {
336-
cerr << the_tid << '\t' << current_tid << endl;
335+
if (chroms_seen.find(the_tid) != end(chroms_seen))
337336
throw runtime_error("chroms out of order in mapped reads file");
338-
}
337+
339338
current_tid = the_tid;
340-
if (VERBOSE) cerr << "processing " << the_tid << endl;
341339

342340
chroms_seen.insert(the_tid);
343341

@@ -346,6 +344,9 @@ main_bsrate(int argc, const char **argv) {
346344
throw runtime_error("could not find chrom: " + the_tid);
347345

348346
chrom_idx = chrom_itr->second;
347+
348+
if (VERBOSE) cerr << "processing " << names[chrom_idx] << endl;
349+
349350
use_this_chrom = seq_to_use.empty() || chrom_idx == chrom_idx_to_use;
350351
}
351352

src/analysis/roimethstat.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include <numeric>
2525
#include <utility>
2626
#include <stdexcept>
27+
#include <regex>
2728

2829
#include <bamxx.hpp>
2930

@@ -48,6 +49,7 @@ using std::runtime_error;
4849
using std::ifstream;
4950
using std::isfinite;
5051
using std::is_sorted;
52+
using std::regex_match;
5153

5254
using bgzf_file = bamxx::bam_bgzf;
5355

@@ -476,6 +478,16 @@ Columns (beyond the first 6) in the BED format output:
476478
// bed format
477479
if (n_columns != 3 && n_columns < 6)
478480
throw runtime_error("format must be 3 or 6+ column bed: " + regions_file);
481+
if (is_msite_file(regions_file)) {
482+
cerr << opt_parse.help_message() << endl;
483+
throw runtime_error("The file seems to be a methylation file: " +
484+
regions_file + "\nCheck the order of the input arguments");
485+
}
486+
if (!is_msite_file(cpgs_file)) {
487+
cerr << opt_parse.help_message() << endl;
488+
throw runtime_error("The file is not a methylation file: " + cpgs_file);
489+
}
490+
479491

480492
vector<GenomicRegion> regions;
481493
ReadBEDFile(regions_file, regions);
@@ -521,3 +533,6 @@ Columns (beyond the first 6) in the BED format output:
521533
}
522534
return EXIT_SUCCESS;
523535
}
536+
537+
538+

src/common/MSite.cpp

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,13 @@
2020
#include <fstream>
2121
#include <sstream>
2222
#include <stdexcept>
23+
#include <regex>
2324

2425
#include "smithlab_utils.hpp"
2526

2627
using std::string;
2728
using std::runtime_error;
29+
using std::regex_match;
2830

2931
MSite::MSite(const string &line) {
3032
/* GS: this is faster but seems to be genenerating issues when
@@ -152,3 +154,45 @@ find_offset_for_msite(const std::string &chr,
152154
move_to_start_of_line(site_in);
153155
}
154156
}
157+
158+
159+
160+
bool
161+
is_msite_file(const string &file) {
162+
ifstream in(file);
163+
if (!in)
164+
throw runtime_error("cannot open file: " + file);
165+
166+
string line;
167+
if(!getline(in, line)) return false;
168+
169+
std::istringstream iss(line);
170+
171+
string chrom;
172+
if (!(iss >> chrom)) return false;
173+
174+
long int pos = 0;
175+
if (!(iss >> pos)) return false;
176+
177+
string strand;
178+
if (!(iss >> strand) ||
179+
(strand.size() != 1) ||
180+
((strand != "+") && (strand != "-")) )
181+
return false;
182+
183+
string context;
184+
std::regex pattern("^C[pHWX][GH]$");
185+
if (!(iss >> context) || !regex_match(context, pattern)) return false;
186+
187+
double level = 0.0;
188+
if (!(iss >> level) || level < 0 || level > 1) return false;
189+
190+
long int n_reads = 0;
191+
if (!(iss >> n_reads)) return false;
192+
193+
string temp;
194+
if (iss >> temp) return false;
195+
else return true;
196+
197+
}
198+

src/common/MSite.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,4 +169,7 @@ read_site(bamxx::bam_bgzf &f, MSite &s) {
169169
return f;
170170
}
171171

172+
bool
173+
is_msite_file(const std::string &file);
174+
172175
#endif

0 commit comments

Comments
 (0)