Skip to content

Commit fbffbb3

Browse files
committed
added support for reading and write gziped BEDs and TSVs
1 parent ed88bf8 commit fbffbb3

22 files changed

+269
-308
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
# Major and user-visible changes
22

3+
## version 1.3.0
4+
* Added ability to read and write gzip compressed TSV and BED files based on an name
5+
extension of '.gz'
6+
37
## version 1.2.0
48
* Added ability to process multiple BAMs at once. This will use more memory
59
and possibly improve the confidence scoring in some cases

configure

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#! /bin/sh
22
# Guess values for system-dependent variables and create Makefiles.
3-
# Generated by GNU Autoconf 2.72 for intronProspector 1.2.0.
3+
# Generated by GNU Autoconf 2.72 for intronProspector 1.3.0.
44
#
55
#
66
# Copyright (C) 1992-1996, 1998-2017, 2020-2023 Free Software Foundation,
@@ -600,8 +600,8 @@ MAKEFLAGS=
600600
# Identity of this package.
601601
PACKAGE_NAME='intronProspector'
602602
PACKAGE_TARNAME='intronprospector'
603-
PACKAGE_VERSION='1.2.0'
604-
PACKAGE_STRING='intronProspector 1.2.0'
603+
PACKAGE_VERSION='1.3.0'
604+
PACKAGE_STRING='intronProspector 1.3.0'
605605
PACKAGE_BUGREPORT=''
606606
PACKAGE_URL=''
607607

@@ -1267,7 +1267,7 @@ if test "$ac_init_help" = "long"; then
12671267
# Omit some internal or obsolete options to make the list less imposing.
12681268
# This message is too long to be a string in the A/UX 3.1 sh.
12691269
cat <<_ACEOF
1270-
'configure' configures intronProspector 1.2.0 to adapt to many kinds of systems.
1270+
'configure' configures intronProspector 1.3.0 to adapt to many kinds of systems.
12711271
12721272
Usage: $0 [OPTION]... [VAR=VALUE]...
12731273
@@ -1330,7 +1330,7 @@ fi
13301330

13311331
if test -n "$ac_init_help"; then
13321332
case $ac_init_help in
1333-
short | recursive ) echo "Configuration of intronProspector 1.2.0:";;
1333+
short | recursive ) echo "Configuration of intronProspector 1.3.0:";;
13341334
esac
13351335
cat <<\_ACEOF
13361336
@@ -1434,7 +1434,7 @@ fi
14341434
test -n "$ac_init_help" && exit $ac_status
14351435
if $ac_init_version; then
14361436
cat <<\_ACEOF
1437-
intronProspector configure 1.2.0
1437+
intronProspector configure 1.3.0
14381438
generated by GNU Autoconf 2.72
14391439
14401440
Copyright (C) 2023 Free Software Foundation, Inc.
@@ -1634,7 +1634,7 @@ cat >config.log <<_ACEOF
16341634
This file contains any messages produced by compilers while
16351635
running configure, to aid debugging if configure makes a mistake.
16361636
1637-
It was created by intronProspector $as_me 1.2.0, which was
1637+
It was created by intronProspector $as_me 1.3.0, which was
16381638
generated by GNU Autoconf 2.72. Invocation command line was
16391639
16401640
$ $0$ac_configure_args_raw
@@ -5086,7 +5086,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
50865086
# report actual input values of CONFIG_FILES etc. instead of their
50875087
# values after options handling.
50885088
ac_log="
5089-
This file was extended by intronProspector $as_me 1.2.0, which was
5089+
This file was extended by intronProspector $as_me 1.3.0, which was
50905090
generated by GNU Autoconf 2.72. Invocation command line was
50915091
50925092
CONFIG_FILES = $CONFIG_FILES
@@ -5141,7 +5141,7 @@ ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\
51415141
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
51425142
ac_cs_config='$ac_cs_config_escaped'
51435143
ac_cs_version="\\
5144-
intronProspector config.status 1.2.0
5144+
intronProspector config.status 1.3.0
51455145
configured by $0, generated by GNU Autoconf 2.72,
51465146
with options \\"\$ac_cs_config\\"
51475147

configure.ac

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
AC_INIT([intronProspector], 1.2.0)
1+
AC_INIT([intronProspector], 1.3.0)
22

33
##
44
# rebuild with:

docs/intronProspector.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ The `intronProspectorMerge` program can be used to convert to from the
2525
`--intron-calls` format to other formats as well as merge the output from
2626
multiple `intronProspector` runs.
2727

28+
TSV and BED files will be automatically compressed with `gzip` if they end in `.gz`.
29+
2830
# OPTIONS
2931

3032
`-h, --help`

docs/intronProspectorMerge.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@
1111
Merge output of the `intronProspector` intron calls tab-separated (TSV) files, as created by the `--intron-calls` option.
1212
This program can also be used to convert the output format from a single run.
1313

14+
Compressed intron calls TSV files are recognized if they end in `.gz`.
15+
TSV and BED files will be automatically compressed with `gzip` if they end in `.gz`.
16+
1417
# Options
1518

1619
`-h, --help`

man/intronProspector.1

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
.\" Automatically generated by Pandoc 3.6.3
1+
.\" Automatically generated by Pandoc 3.6.2
22
.\"
3-
.TH "intronProspector" "1" "March 15, 2025" "Call intron junctions"
3+
.TH "intronProspector" "1" "April 10, 2025" "Call intron junctions"
44
.SH NAME
55
\f[B]intronProspector\f[R] \[em] Extract putative intron junctions from
66
RNA\-Seq alignments
@@ -34,6 +34,9 @@ It may also improve the confidence scoring in some cases.
3434
The \f[CR]intronProspectorMerge\f[R] program can be used to convert to
3535
from the \f[CR]\-\-intron\-calls\f[R] format to other formats as well as
3636
merge the output from multiple \f[CR]intronProspector\f[R] runs.
37+
.PP
38+
TSV and BED files will be automatically compressed with \f[CR]gzip\f[R]
39+
if they end in \f[CR].gz\f[R].
3740
.SH OPTIONS
3841
\f[CR]\-h, \-\-help\f[R]
3942
.RS

man/intronProspectorMerge.1

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
.\" Automatically generated by Pandoc 3.6.3
1+
.\" Automatically generated by Pandoc 3.6.2
22
.\"
3-
.TH "intronProspectorMerge" "1" "March 15, 2025" "Merge introns junctions"
3+
.TH "intronProspectorMerge" "1" "April 10, 2025" "Merge introns junctions"
44
.SH NAME
55
\f[B]intronProspectorMerge\f[R] \[em] Merge putative introns junctions
66
calls made \f[CR]intronProspector\f[R] and/or convert output formats
@@ -12,6 +12,11 @@ tab\-separated (TSV) files, as created by the
1212
\f[CR]\-\-intron\-calls\f[R] option.
1313
This program can also be used to convert the output format from a single
1414
run.
15+
.PP
16+
Compressed intron calls TSV files are recognized if they end in
17+
\f[CR].gz\f[R].
18+
TSV and BED files will be automatically compressed with \f[CR]gzip\f[R]
19+
if they end in \f[CR].gz\f[R].
1520
.SH Options
1621
\f[CR]\-h, \-\-help\f[R]
1722
.RS

src/AutoGzip.hh

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
#ifndef autogzip_hh
2+
#define autogzip_hh
3+
4+
#include <fstream>
5+
#include <memory>
6+
#include <string>
7+
#include "zfstream.hh" // defines gzifstream, gzofstream
8+
9+
static bool isGzipName(const std::string& path) {
10+
return path.size() >= 3 && path.compare(path.size() - 3, 3, ".gz") == 0;
11+
}
12+
13+
class AutoGzipInput : public std::istream {
14+
public:
15+
explicit AutoGzipInput(const std::string& filename)
16+
: std::istream(nullptr), filename_(filename), is_gz_(isGzipName(filename)) {
17+
if (is_gz_) {
18+
gz_ = std::make_unique<gzifstream>(filename_);
19+
if (!gz_->is_open())
20+
throw std::ios_base::failure("Failed to open gzip file: " + filename_);
21+
rdbuf(gz_->rdbuf());
22+
} else {
23+
file_ = std::make_unique<std::ifstream>(filename_);
24+
if (!file_->is_open())
25+
throw std::ios_base::failure("Failed to open file: " + filename_);
26+
rdbuf(file_->rdbuf());
27+
}
28+
}
29+
30+
bool is_compressed() const { return is_gz_; }
31+
const std::string& filename() const { return filename_; }
32+
33+
private:
34+
const std::string filename_;
35+
bool is_gz_;
36+
std::unique_ptr<gzifstream> gz_;
37+
std::unique_ptr<std::ifstream> file_;
38+
};
39+
40+
class AutoGzipOutput : public std::ostream {
41+
public:
42+
explicit AutoGzipOutput(const std::string& filename)
43+
: std::ostream(nullptr), filename_(filename), is_gz_(isGzipName(filename)) {
44+
if (is_gz_) {
45+
gz_ = std::make_unique<gzofstream>(filename_);
46+
if (!gz_->is_open())
47+
throw std::ios_base::failure("Failed to open gzip file: " + filename_);
48+
rdbuf(gz_->rdbuf());
49+
} else {
50+
file_ = std::make_unique<std::ofstream>(filename_);
51+
if (!file_->is_open())
52+
throw std::ios_base::failure("Failed to open file: " + filename_);
53+
rdbuf(file_->rdbuf());
54+
}
55+
}
56+
57+
bool is_compressed() const { return is_gz_; }
58+
const std::string& filename() const { return filename_; }
59+
60+
private:
61+
const std::string filename_;
62+
bool is_gz_;
63+
std::unique_ptr<gzofstream> gz_;
64+
std::unique_ptr<std::ofstream> file_;
65+
};
66+
67+
#endif

src/GzipStreamBuf.cc

Lines changed: 0 additions & 96 deletions
This file was deleted.

src/GzipStreamBuf.hh

Lines changed: 0 additions & 52 deletions
This file was deleted.

0 commit comments

Comments
 (0)