Skip to content

Commit 061bee6

Browse files
Merge pull request #96 from smithlabcode/zlib-to-bgzf
Zlib to bgzf
2 parents 0e4b9e3 + 24e5afa commit 061bee6

19 files changed

+432
-159
lines changed

Makefile.am

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,8 @@ EXTRA_DIST = \
8080
test_scripts/test_radmeth.test \
8181
test_scripts/test_states.test \
8282
test_scripts/test_amrfinder.test \
83-
test_scripts/test_hypermr.test
83+
test_scripts/test_hypermr.test \
84+
test_scripts/test_diff.test
8485

8586
### ADS: the file md5sum.txt can be regenerated by running the tests
8687
### with "make check" and then doing `md5sum tests/* > md5sum.txt`
@@ -109,7 +110,8 @@ TESTS = test_scripts/test_simreads.test \
109110
test_scripts/test_radmeth.test \
110111
test_scripts/test_states.test \
111112
test_scripts/test_amrfinder.test \
112-
test_scripts/test_hypermr.test
113+
test_scripts/test_hypermr.test \
114+
test_scripts/test_diff.test
113115

114116
TEST_EXTENSIONS = .test
115117

@@ -234,4 +236,5 @@ CLEANFILES = \
234236
tests/radmeth_test_output.txt \
235237
tests/reads.epiread \
236238
tests/two_epialleles.amr \
237-
tests/araTha1_simulated.hypermr
239+
tests/araTha1_simulated.hypermr \
240+
tests/methylome_ab.diff

configure.ac

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,8 @@ ADS_OPENMP([], [AC_MSG_FAILURE([OpenMP must be installed to build dnmtools])])
3838

3939
dnl recursively configure abismal and smithlab_cpp
4040
AC_CONFIG_SUBDIRS([src/abismal])
41-
AX_SUBDIRS_CONFIGURE([src/smithlab_cpp], [--enable-hts])
41+
AC_CONFIG_SUBDIRS([src/smithlab_cpp])
42+
dnl AX_SUBDIRS_CONFIGURE([src/smithlab_cpp], [--enable-hts])
4243

4344
dnl check for HTSLib if requested
4445
hts_fail_msg="
@@ -53,15 +54,8 @@ system. Please use the LDFLAGS and CPPFLAGS variables to specify the
5354
directories where the GSL library and headers can be found.
5455
"
5556

56-
zlib_fail_msg="
57-
Failed to locate the ZLib on your system. Please use the LDFLAGS and
58-
CPPFLAGS variables to specify the directories where the ZLib library
59-
and headers can be found.
60-
"
61-
6257
dnl check for required libraries
6358
AC_SEARCH_LIBS([hts_version], [hts], [], [AC_MSG_FAILURE([$hts_fail_msg])])
64-
AC_SEARCH_LIBS([zlibVersion], [z], [], [AC_MSG_FAILURE([$zlib_fail_msg])])
6559
AC_SEARCH_LIBS([cblas_dgemm], [gslcblas], [], [AC_MSG_FAILURE([$gsl_fail_msg])])
6660
AC_SEARCH_LIBS([gsl_blas_dgemm], [gsl], [], [AC_MSG_FAILURE([$gsl_fail_msg])])
6761

@@ -80,6 +74,8 @@ tests/radmeth_test_table.txt:data/radmeth_test_table.txt
8074
tests/radmeth_test_design.txt:data/radmeth_test_design.txt
8175
tests/two_epialleles.states:data/two_epialleles.states
8276
tests/araTha1_simulated.counts.gz:data/araTha1_simulated.counts.gz
77+
tests/methylome_a.counts.sym:data/methylome_a.counts.sym
78+
tests/methylome_b.counts.sym:data/methylome_b.counts.sym
8379
])
8480

8581
AC_OUTPUT

data/md5sum.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,3 +18,4 @@ b5270bb38d798741cfa74f411a0d49bf tests/tRex1_promoters.roi.bed
1818
34f5eddd80d5d35131f92697d4cca896 tests/reads.counts.sym
1919
5d1ecf8d4c8a62b274f5d07e7de7d01c tests/reads.fmt.srt.uniq.sam
2020
b067a733102e611ca614ae22fc944471 tests/reads.ustats
21+
0048de3fc412cb12ec2e070c8151f86f tests/methylome_ab.diff

data/methylome_a.counts.sym

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
chr1 163 + CpG 0.885371 2495
2+
chr1 206 + CpG 0.900059 3362
3+
chr1 232 + CpG 0.891898 4283
4+
chr1 278 + CpG 0.895936 4872
5+
chr1 296 + CpG 0.904536 5070
6+
chr1 310 + CpG 0.900655 5194
7+
chr1 322 + CpG 0.102944 5333
8+
chr1 324 + CpG 0.0979768 5338
9+
chr1 350 + CpG 0.0992214 5523
10+
chr1 356 + CpG 0.0980427 5569
11+
chr1 358 + CpG 0.0944375 5591
12+
chr1 367 + CpG 0.0983925 5661
13+
chr1 388 + CpG 0.100379 5808
14+
chr1 402 + CpG 0.894009 5859
15+
chr1 404 + CpG 0.898489 5891
16+
chr1 422 + CpG 0.890427 5996
17+
chr1 434 + CpG 0.891272 6061
18+
chr1 442 + CpG 0.890658 6091
19+
chr1 448 + CpG 0.896047 6147
20+
chr1 461 + CpG 0.893198 6189
21+
chr1 467 + CpG 0.895397 6214
22+
chr1 473 + CpG 0.890295 6244
23+
chr1 485 + CpG 0.896256 6304
24+
chr1 488 + CpG 0.897663 6332
25+
chr1 496 + CpG 0.896302 6355
26+
chr1 502 + CpG 0.895712 6367
27+
chr1 514 + CpG 0.896622 6365
28+
chr1 517 + CpG 0.895009 6372
29+
chr1 520 + CpG 0.892313 6361
30+
chr1 522 + CpG 0.894836 6352
31+
chr1 535 + CpG 0.893348 6404
32+
chr1 537 + CpG 0.900701 6415
33+
chr1 540 + CpG 0.898191 6414
34+
chr1 564 + CpG 0.893356 6367
35+
chr1 569 + CpG 0.89719 6371
36+
chr1 572 + CpG 0.89482 6332
37+
chr1 577 + CpG 0.892193 6289
38+
chr1 583 + CpG 0.894065 6268
39+
chr1 585 + CpG 0.894627 6254
40+
chr1 588 + CpG 0.896248 6236
41+
chr1 594 + CpG 0.896346 6213
42+
chr1 602 + CpG 0.893856 6152
43+
chr1 606 + CpG 0.900572 6115
44+
chr1 609 + CpG 0.889762 6105
45+
chr1 612 + CpG 0.90954 6069
46+
chr1 617 + CpG 0.89103 6020
47+
chr1 620 + CpG 0.897577 5985
48+
chr1 631 + CpG 0.896323 5874
49+
chr1 633 + CpG 0.895214 5850
50+
chr1 642 + CpG 0.900296 5737
51+
chr1 650 + CpG 0.902435 5709
52+
chr1 654 + CpG 0.896709 5683
53+
chr1 660 + CpG 0.897639 5676
54+
chr1 665 + CpG 0.886054 5643
55+
chr1 673 + CpG 0.900411 5593
56+
chr1 679 + CpG 0.892864 5535
57+
chr1 681 + CpG 0.895913 5505
58+
chr1 684 + CpG 0.906811 5462
59+
chr1 702 + CpG 0.893238 5339
60+
chr1 705 + CpG 0.893273 5322
61+
chr1 708 + CpG 0.89059 5292
62+
chr1 710 + CpG 0.895027 5268
63+
chr1 713 + CpG 0.896526 5267
64+
chr1 729 + CpG 0.891296 5170
65+
chr1 731 + CpG 0.894326 5129
66+
chr1 737 + CpG 0.101157 5101
67+
chr1 745 + CpG 0.098996 4980
68+
chr1 755 + CpG 0.103188 4768
69+
chr1 757 + CpG 0.0993447 4731
70+
chr1 760 + CpG 0.0984832 4681
71+
chr1 766 + CpG 0.100824 4612
72+
chr1 779 + CpG 0.097355 4499
73+
chr1 785 + CpG 0.104054 4440
74+
chr1 787 + CpG 0.0980481 4406
75+
chr1 792 + CpG 0.104547 4333
76+
chr1 799 + CpG 0.0990355 4251
77+
chr1 801 + CpG 0.0969194 4220
78+
chr1 804 + CpG 0.884496 4199
79+
chr1 816 + CpG 0.89358 4003
80+
chr1 824 + CpG 0.893299 3880
81+
chr1 828 + CpG 0.892152 3848
82+
chr1 831 + CpG 0.890568 3838
83+
chr1 834 + CpG 0.891522 3798
84+
chr1 839 + CpG 0.897553 3719
85+
chr1 845 + CpG 0.899183 3670
86+
chr1 853 + CpG 0.898612 3531
87+
chr1 857 + CpG 0.900296 3380
88+
chr1 860 + CpG 0.896175 3294
89+
chr1 863 + CpG 0.892756 3189
90+
chr1 868 + CpG 0.891703 3001
91+
chr1 874 + CpG 0.886834 2757
92+
chr1 882 + CpG 0.907975 2445
93+
chr1 886 + CpG 0.880694 2305
94+
chr1 889 + CpG 0.882969 2196
95+
chr1 892 + CpG 0.896952 2067
96+
chr1 894 + CpG 0.889332 2006
97+
chr1 897 + CpG 0.886603 1896
98+
chr1 903 + CpG 0.896429 1680
99+
chr1 911 + CpG 0.881223 1406
100+
chr1 915 + CpG 0.868526 1255
101+
chr1 918 + CpG 0.887457 1164
102+
chr1 921 + CpG 0.887417 1057
103+
chr1 923 + CpG 0.872802 967
104+
chr1 926 + CpG 0.875887 846
105+
chr1 932 + CpG 0.88853 619
106+
chr1 940 + CpG 0.865714 350
107+
chr1 944 + CpG 0.884058 207
108+
chr1 947 + CpG 0.708738 103

data/methylome_b.counts.sym

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
chr1 163 + CpG 0.896375 2538
2+
chr1 206 + CpG 0.897481 3414
3+
chr1 232 + CpG 0.888131 4398
4+
chr1 278 + CpG 0.894575 4866
5+
chr1 296 + CpG 0.892725 5127
6+
chr1 310 + CpG 0.893081 5275
7+
chr1 322 + CpG 0.899683 5363
8+
chr1 324 + CpG 0.892228 5391
9+
chr1 350 + CpG 0.898259 5573
10+
chr1 356 + CpG 0.884881 5655
11+
chr1 358 + CpG 0.899364 5664
12+
chr1 367 + CpG 0.887803 5731
13+
chr1 388 + CpG 0.8979 5906
14+
chr1 402 + CpG 0.887257 6058
15+
chr1 404 + CpG 0.889255 6077
16+
chr1 422 + CpG 0.892903 6200
17+
chr1 434 + CpG 0.898734 6320
18+
chr1 442 + CpG 0.896819 6319
19+
chr1 448 + CpG 0.89785 6373
20+
chr1 461 + CpG 0.105519 6378
21+
chr1 467 + CpG 0.0939797 6395
22+
chr1 473 + CpG 0.0959203 6422
23+
chr1 485 + CpG 0.0894118 6375
24+
chr1 488 + CpG 0.101708 6381
25+
chr1 496 + CpG 0.0996085 6385
26+
chr1 502 + CpG 0.0979121 6322
27+
chr1 514 + CpG 0.100904 6303
28+
chr1 517 + CpG 0.0972134 6316
29+
chr1 520 + CpG 0.0994906 6282
30+
chr1 522 + CpG 0.0937997 6290
31+
chr1 535 + CpG 0.100511 6258
32+
chr1 537 + CpG 0.090749 6248
33+
chr1 540 + CpG 0.103028 6241
34+
chr1 564 + CpG 0.0994068 6237
35+
chr1 569 + CpG 0.100256 6244
36+
chr1 572 + CpG 0.0916251 6221
37+
chr1 577 + CpG 0.0930195 6203
38+
chr1 583 + CpG 0.101648 6188
39+
chr1 585 + CpG 0.0988014 6174
40+
chr1 588 + CpG 0.888853 6154
41+
chr1 594 + CpG 0.895356 6116
42+
chr1 602 + CpG 0.892945 6109
43+
chr1 606 + CpG 0.894495 6085
44+
chr1 609 + CpG 0.899095 6075
45+
chr1 612 + CpG 0.906008 6075
46+
chr1 617 + CpG 0.890079 6068
47+
chr1 620 + CpG 0.893045 6068
48+
chr1 631 + CpG 0.896924 6015
49+
chr1 633 + CpG 0.906344 5990
50+
chr1 642 + CpG 0.891856 5992
51+
chr1 650 + CpG 0.895422 5919
52+
chr1 654 + CpG 0.892596 5875
53+
chr1 660 + CpG 0.901209 5790
54+
chr1 665 + CpG 0.892863 5731
55+
chr1 673 + CpG 0.910198 5668
56+
chr1 679 + CpG 0.897883 5621
57+
chr1 681 + CpG 0.889067 5607
58+
chr1 684 + CpG 0.89372 5589
59+
chr1 702 + CpG 0.895077 5423
60+
chr1 705 + CpG 0.892007 5380
61+
chr1 708 + CpG 0.895394 5363
62+
chr1 710 + CpG 0.894124 5327
63+
chr1 713 + CpG 0.891525 5310
64+
chr1 729 + CpG 0.892229 5057
65+
chr1 731 + CpG 0.901375 5019
66+
chr1 737 + CpG 0.889204 4937
67+
chr1 745 + CpG 0.892381 4804
68+
chr1 755 + CpG 0.898156 4664
69+
chr1 757 + CpG 0.888985 4648
70+
chr1 760 + CpG 0.893792 4623
71+
chr1 766 + CpG 0.900198 4539
72+
chr1 779 + CpG 0.897518 4352
73+
chr1 785 + CpG 0.89578 4289
74+
chr1 787 + CpG 0.895231 4257
75+
chr1 792 + CpG 0.897337 4169
76+
chr1 799 + CpG 0.898918 4066
77+
chr1 801 + CpG 0.899803 4052
78+
chr1 804 + CpG 0.897532 4011
79+
chr1 816 + CpG 0.895012 3829
80+
chr1 824 + CpG 0.903985 3739
81+
chr1 828 + CpG 0.898031 3707
82+
chr1 831 + CpG 0.892002 3676
83+
chr1 834 + CpG 0.905847 3643
84+
chr1 839 + CpG 0.889659 3607
85+
chr1 845 + CpG 0.893179 3548
86+
chr1 853 + CpG 0.889873 3387
87+
chr1 857 + CpG 0.887658 3249
88+
chr1 860 + CpG 0.89147 3142
89+
chr1 863 + CpG 0.889328 3036
90+
chr1 868 + CpG 0.89277 2863
91+
chr1 874 + CpG 0.892424 2640
92+
chr1 882 + CpG 0.899573 2340
93+
chr1 886 + CpG 0.890511 2192
94+
chr1 889 + CpG 0.892601 2095
95+
chr1 892 + CpG 0.898949 1999
96+
chr1 894 + CpG 0.891316 1923
97+
chr1 897 + CpG 0.873894 1808
98+
chr1 903 + CpG 0.893949 1603
99+
chr1 911 + CpG 0.883738 1359
100+
chr1 915 + CpG 0.870968 1209
101+
chr1 918 + CpG 0.889488 1113
102+
chr1 921 + CpG 0.892644 1006
103+
chr1 923 + CpG 0.884289 942
104+
chr1 926 + CpG 0.890361 830
105+
chr1 932 + CpG 0.893142 627
106+
chr1 940 + CpG 0.830357 336
107+
chr1 944 + CpG 0.0909091 187
108+
chr1 947 + CpG 0.120482 83

src/analysis/hmr-rep.cpp

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,12 @@
2222
#include <cstdint> // for [u]int[0-9]+_t
2323
#include <random>
2424

25+
#include <bamxx.hpp>
26+
2527
#include "smithlab_utils.hpp"
2628
#include "smithlab_os.hpp"
2729
#include "GenomicRegion.hpp"
2830
#include "OptionParser.hpp"
29-
#include "zlib_wrapper.hpp"
3031

3132
#include "TwoStateHMM.hpp"
3233
#include "MSite.hpp"
@@ -46,6 +47,8 @@ using std::to_string;
4647
using std::begin;
4748
using std::end;
4849

50+
using bgzf_file = bamxx::bam_bgzf;
51+
4952
static GenomicRegion
5053
as_gen_rgn(const MSite &s) {
5154
return GenomicRegion(s.chrom, s.pos, s.pos + 1);
@@ -297,12 +300,11 @@ load_cpgs(const string &cpgs_file, vector<MSite> &cpgs,
297300
vector<pair<double, double> > &meth,
298301
vector<uint32_t> &reads) {
299302

300-
igzfstream in(cpgs_file);
301-
if (!in)
302-
throw runtime_error("failed opening file: " + cpgs_file);
303+
bgzf_file in(cpgs_file, "r");
304+
if (!in) throw runtime_error("failed opening file: " + cpgs_file);
303305

304306
MSite the_site;
305-
while (in >> the_site) {
307+
while (read_site(in, the_site)) {
306308
cpgs.push_back(the_site);
307309
reads.push_back(the_site.n_reads);
308310
meth.push_back(make_pair(the_site.n_meth(), the_site.n_unmeth()));

src/analysis/hmr.cpp

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,12 @@
2424
#include <cstdint> // for [u]int[0-9]+_t
2525
#include <random>
2626

27+
#include <bamxx.hpp>
28+
2729
#include "smithlab_utils.hpp"
2830
#include "smithlab_os.hpp"
2931
#include "GenomicRegion.hpp"
3032
#include "OptionParser.hpp"
31-
#include "zlib_wrapper.hpp"
3233

3334
#include "TwoStateHMM.hpp"
3435
#include "MSite.hpp"
@@ -47,6 +48,8 @@ using std::runtime_error;
4748
using std::to_string;
4849
using std::unordered_set;
4950

51+
using bgzf_file = bamxx::bam_bgzf;
52+
5053
static GenomicRegion
5154
as_gen_rgn(const MSite &s) {
5255
return GenomicRegion(s.chrom, s.pos, s.pos + 1);
@@ -308,12 +311,11 @@ load_cpgs(const string &cpgs_file, vector<MSite> &cpgs,
308311
vector<pair<double, double> > &meth,
309312
vector<uint32_t> &reads) {
310313

311-
igzfstream in(cpgs_file);
312-
if (!in)
313-
throw runtime_error("failed opening file: " + cpgs_file);
314+
bgzf_file in(cpgs_file, "r");
315+
if (!in) throw runtime_error("failed opening file: " + cpgs_file);
314316

315317
MSite prev_site, the_site;
316-
while (in >> the_site) {
318+
while (read_site(in, the_site)) {
317319
if (!the_site.is_cpg() || distance(prev_site, the_site) < 2)
318320
throw runtime_error("error: input is not symmetric-CpGs: " + cpgs_file);
319321
cpgs.push_back(the_site);

0 commit comments

Comments
 (0)