Skip to content

Commit be22a2a

Browse files
committed
Release 1.8
2 parents 209f94b + 107e7d1 commit be22a2a

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

67 files changed

+2869
-588
lines changed

.appveyor.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ build_script:
3030
- set HOME=.
3131
- set MSYSTEM=MINGW64
3232
- set PATH=C:/msys64/usr/bin;C:/msys64/mingw64/bin;%PATH%
33-
- "sh -lc \"aclocal && autoheader && autoconf && ./configure && make -j2\""
33+
- "sh -lc \"aclocal && autoheader && autoconf && ./configure CFLAGS='-Wno-format -g -O2' && make -j2\""
3434

3535
#build_script:
3636
# - make

.gitattributes

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
*.cram -text diff=cram
88

99
# Omit these files from release tarballs.
10+
/.appveyor.yml export-ignore
1011
.git* export-ignore
1112
/.travis.yml export-ignore
1213
README.md export-ignore

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ lib*.so.*
4343
/test/test-bcf-sr
4444
/test/test-bcf-translate
4545
/test/test_bgzf
46+
/test/test_realn
4647
/test/test-regidx
4748
/test/test-vcf-api
4849
/test/test-vcf-sweep

.travis.yml

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,15 @@ env:
1313
- USE_CONFIG=no
1414
- USE_CONFIG=yes
1515

16+
matrix:
17+
include:
18+
- compiler: gcc
19+
os: linux
20+
env: USE_CONFIG=yes USE_LIBDEFLATE=yes
21+
- compiler: clang
22+
os: osx
23+
env: USE_CONFIG=yes USE_LIBDEFLATE=yes
24+
1625
# For linux systems
1726
addons:
1827
apt:
@@ -24,5 +33,9 @@ addons:
2433
before_install:
2534
- if [[ "$TRAVIS_OS_NAME" == "osx" && "$USE_CONFIG" == "no" ]]; then HOMEBREW_NO_AUTO_UPDATE=1 brew install xz || ( brew update && brew install xz ); fi
2635

36+
before_script:
37+
- if test "x$USE_LIBDEFLATE" == "xyes" ; then ( cd "$HOME" && git clone --depth 1 https://github.com/ebiggers/libdeflate.git && cd libdeflate && make -j 2 CFLAGS='-fPIC -O3' libdeflate.a ); fi
38+
2739
script:
28-
- if test "$USE_CONFIG" = "yes" ; then autoreconf && ./configure ; fi && make -e && make test
40+
- if test "x$USE_LIBDEFLATE" = "xyes" ; then CONFIG_OPTS='CPPFLAGS="-I$HOME/libdeflate" LDFLAGS="-L$HOME/libdeflate" --with-libdeflate' ; else CONFIG_OPTS='--without-libdeflate' ; fi
41+
- if test "$USE_CONFIG" = "yes" ; then autoreconf && eval ./configure $CONFIG_OPTS || { cat config.log ; false ; } ; fi && make -j 2 -e && make test

INSTALL

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,12 @@ various features and specify further optional external requirements:
149149
by default. It can be disabled with --disable-lzma, but be aware
150150
that not all CRAM files may be possible to decode.
151151

152+
--with-libdeflate
153+
Libdeflate is a heavily optimized library for DEFLATE-based compression
154+
and decompression. It also includes a fast crc32 implementation.
155+
By default, ./configure will probe for libdeflate and use it if
156+
available. To prevent this, use --without-libdeflate.
157+
152158
The configure script also accepts the usual options and environment variables
153159
for tuning installation locations and compilers: type './configure --help'
154160
for details. For example,
@@ -158,6 +164,16 @@ for details. For example,
158164
would specify that HTSlib is to be built with icc and installed into bin,
159165
lib, etc subdirectories under /opt/icc-compiled.
160166

167+
If dependencies have been installed in non-standard locations (i.e. not on
168+
the normal include and library search paths) then the CPPFLAGS and LDFLAGS
169+
environment variables can be used to set the options needed to find them.
170+
For example, NetBSD users may use:
171+
172+
./configure CPPFLAGS=-I/usr/pkg/include \
173+
LDFLAGS='-L/usr/pkg/lib -Wl,-R/usr/pkg/lib'
174+
175+
to allow compiling and linking against dependencies installed via the ports
176+
collection.
161177

162178
Installation Locations
163179
======================

Makefile

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ BUILT_TEST_PROGRAMS = \
7272
test/hfile \
7373
test/sam \
7474
test/test_bgzf \
75+
test/test_realn \
7576
test/test-regidx \
7677
test/test_view \
7778
test/test-vcf-api \
@@ -377,6 +378,9 @@ test/sam: test/sam.o libhts.a
377378
test/test_bgzf: test/test_bgzf.o libhts.a
378379
$(CC) $(LDFLAGS) -o $@ test/test_bgzf.o libhts.a -lz $(LIBS) -lpthread
379380

381+
test/test_realn: test/test_realn.o libhts.a
382+
$(CC) $(LDFLAGS) -o $@ test/test_realn.o libhts.a $(LIBS) -lpthread
383+
380384
test/test-regidx: test/test-regidx.o libhts.a
381385
$(CC) $(LDFLAGS) -o $@ test/test-regidx.o libhts.a $(LIBS) -lpthread
382386

@@ -400,6 +404,7 @@ test/fieldarith.o: test/fieldarith.c config.h $(htslib_sam_h)
400404
test/hfile.o: test/hfile.c config.h $(htslib_hfile_h) $(htslib_hts_defs_h)
401405
test/sam.o: test/sam.c config.h $(htslib_hts_defs_h) $(htslib_sam_h) $(htslib_faidx_h) $(htslib_kstring_h)
402406
test/test_bgzf.o: test/test_bgzf.c $(htslib_bgzf_h) $(htslib_hfile_h)
407+
test/test-realn.o: test/test_realn.c $(htslib_hts_h) $(htslib_sam_h) $(htslib_faidx_h)
403408
test/test-regidx.o: test/test-regidx.c config.h $(htslib_regidx_h) $(hts_internal_h)
404409
test/test_view.o: test/test_view.c config.h $(cram_h) $(htslib_sam_h)
405410
test/test-vcf-api.o: test/test-vcf-api.c config.h $(htslib_hts_h) $(htslib_vcf_h) $(htslib_kstring_h) $(htslib_kseq_h)
@@ -434,7 +439,7 @@ install: libhts.a $(BUILT_PROGRAMS) $(BUILT_PLUGINS) installdirs install-$(SHLIB
434439
if test -n "$(BUILT_PLUGINS)"; then $(INSTALL_PROGRAM) $(BUILT_PLUGINS) $(DESTDIR)$(plugindir); fi
435440
$(INSTALL_DATA) htslib/*.h $(DESTDIR)$(includedir)/htslib
436441
$(INSTALL_DATA) libhts.a $(DESTDIR)$(libdir)/libhts.a
437-
$(INSTALL_MAN) htsfile.1 tabix.1 $(DESTDIR)$(man1dir)
442+
$(INSTALL_MAN) bgzip.1 htsfile.1 tabix.1 $(DESTDIR)$(man1dir)
438443
$(INSTALL_MAN) faidx.5 sam.5 vcf.5 $(DESTDIR)$(man5dir)
439444

440445
installdirs:

NEWS

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,51 @@
1+
Noteworthy changes in release 1.8 (3rd April 2018)
2+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
3+
4+
* The URL to get sequences from the EBI reference server has been changed
5+
to https://. This is because the EBI no longer serve sequences via
6+
plain HTTP - requests to the http:// endpoint just get redirected.
7+
HTSlib needs to be linked against libcurl to download https:// URLs,
8+
so CRAM users who want to get references from the EBI will need to
9+
run configure and ensure libcurl support is enabled using the
10+
--enable-libcurl option.
11+
12+
* Added libdeflate as a build option for alternative faster compression and
13+
decompression. Results vary by CPU but compression should be twice as fast
14+
and decompression faster.
15+
16+
* It is now possible to set the compression level in bgzip. (#675; thanks
17+
to Nathan Weeks).
18+
19+
* bgzip now gets its own manual page.
20+
21+
* CRAM encoding now stored MD and NM tags verbatim where the reference
22+
contains 'N' characters, to work around ambiguities in the SAM
23+
specification (samtools #717/762).
24+
Also added "store_md" and "store_nm" cram-options for forcing these
25+
tags to be stored at all locations. This is best when combined with
26+
a subsequent decode_md=0 option while reading CRAM.
27+
28+
* Multiple CRAM bug fixes, including a fix to free and the subsequent reuse of
29+
references with `-T ref.fa`. (#654; reported by Chris Saunders)
30+
31+
* CRAM multi-threading bugs fixed: don't try to call flush on reading;
32+
processing of multiple range queries; problems with multi-slice containers.
33+
34+
* Fixed crashes caused when decoding some cramtools produced CRAM files.
35+
36+
* Fixed a couple of minor rANS issues with handling invalid data.
37+
38+
* Fixed bug where probaln_glocal() tried to allocate far more memory than
39+
needed when the query sequence was much longer than the reference. This
40+
caused crashes in samtools and bcftools mpileup when used on data with very
41+
long reads. (#572, problem reported by Felix Bemm via minimap2).
42+
43+
* sam_prop_realn() now returns -1 (the same value as for unmapped reads)
44+
on reads that do not include at least one 'M', 'X' or '=' CIGAR operator,
45+
and no longer adds BQ or ZQ tags. BAQ adjustments are only made to bases
46+
covered by these operators so there is no point in trying to align
47+
reads that do not have them. (#572)
48+
149
Noteworthy changes in release 1.7 (26th January 2018)
250
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
351

bcf_sr_sort.c

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -628,10 +628,16 @@ int bcf_sr_sort_next(bcf_srs_t *readers, sr_sort_t *srt, const char *chr, int mi
628628
}
629629
void bcf_sr_sort_remove_reader(bcf_srs_t *readers, sr_sort_t *srt, int i)
630630
{
631-
free(srt->vcf_buf[i].rec);
632-
if ( i+1 < srt->nsr )
633-
memmove(&srt->vcf_buf[i], &srt->vcf_buf[i+1], (srt->nsr - i - 1)*sizeof(vcf_buf_t));
634-
memset(srt->vcf_buf + srt->nsr - 1, 0, sizeof(vcf_buf_t));
631+
//vcf_buf is allocated only in bcf_sr_sort_next
632+
//So, a call to bcf_sr_add_reader() followed immediately by bcf_sr_remove_reader()
633+
//would cause the program to crash in this segment
634+
if (srt->vcf_buf)
635+
{
636+
free(srt->vcf_buf[i].rec);
637+
if ( i+1 < srt->nsr )
638+
memmove(&srt->vcf_buf[i], &srt->vcf_buf[i+1], (srt->nsr - i - 1)*sizeof(vcf_buf_t));
639+
memset(srt->vcf_buf + srt->nsr - 1, 0, sizeof(vcf_buf_t));
640+
}
635641
}
636642
sr_sort_t *bcf_sr_sort_init(sr_sort_t *srt)
637643
{

bgzf.c

Lines changed: 97 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,10 @@
3535
#include <sys/types.h>
3636
#include <inttypes.h>
3737

38+
#ifdef HAVE_LIBDEFLATE
39+
#include <libdeflate.h>
40+
#endif
41+
3842
#include "htslib/hts.h"
3943
#include "htslib/bgzf.h"
4044
#include "htslib/hfile.h"
@@ -359,6 +363,64 @@ BGZF *bgzf_hopen(hFILE *hfp, const char *mode)
359363
return fp;
360364
}
361365

366+
#ifdef HAVE_LIBDEFLATE
367+
int bgzf_compress(void *_dst, size_t *dlen, const void *src, size_t slen, int level)
368+
{
369+
if (slen == 0) {
370+
// EOF block
371+
if (*dlen < 28) return -1;
372+
memcpy(_dst, "\037\213\010\4\0\0\0\0\0\377\6\0\102\103\2\0\033\0\3\0\0\0\0\0\0\0\0\0", 28);
373+
*dlen = 28;
374+
return 0;
375+
}
376+
377+
uint8_t *dst = (uint8_t*)_dst;
378+
379+
if (level == 0) {
380+
// Uncompressed data
381+
if (*dlen < slen+5 + BLOCK_HEADER_LENGTH + BLOCK_FOOTER_LENGTH) return -1;
382+
dst[BLOCK_HEADER_LENGTH] = 1; // BFINAL=1, BTYPE=00; see RFC1951
383+
u16_to_le(slen, &dst[BLOCK_HEADER_LENGTH+1]); // length
384+
u16_to_le(~slen, &dst[BLOCK_HEADER_LENGTH+3]); // ones-complement length
385+
memcpy(dst + BLOCK_HEADER_LENGTH+5, src, slen);
386+
*dlen = slen+5 + BLOCK_HEADER_LENGTH + BLOCK_FOOTER_LENGTH;
387+
388+
} else {
389+
level = level > 0 ? level : 6; // libdeflate doesn't honour -1 as default
390+
// NB levels go up to 12 here.
391+
struct libdeflate_compressor *z = libdeflate_alloc_compressor(level);
392+
if (!z) return -1;
393+
394+
// Raw deflate
395+
size_t clen =
396+
libdeflate_deflate_compress(z, src, slen,
397+
dst + BLOCK_HEADER_LENGTH,
398+
*dlen - BLOCK_HEADER_LENGTH - BLOCK_FOOTER_LENGTH);
399+
400+
if (clen <= 0) {
401+
hts_log_error("Call to libdeflate_deflate_compress failed");
402+
libdeflate_free_compressor(z);
403+
return -1;
404+
}
405+
406+
*dlen = clen + BLOCK_HEADER_LENGTH + BLOCK_FOOTER_LENGTH;
407+
408+
libdeflate_free_compressor(z);
409+
}
410+
411+
// write the header
412+
memcpy(dst, g_magic, BLOCK_HEADER_LENGTH); // the last two bytes are a place holder for the length of the block
413+
packInt16(&dst[16], *dlen - 1); // write the compressed length; -1 to fit 2 bytes
414+
415+
// write the footer
416+
uint32_t crc = libdeflate_crc32(0, src, slen);
417+
packInt32((uint8_t*)&dst[*dlen - 8], crc);
418+
packInt32((uint8_t*)&dst[*dlen - 4], slen);
419+
return 0;
420+
}
421+
422+
#else
423+
362424
int bgzf_compress(void *_dst, size_t *dlen, const void *src, size_t slen, int level)
363425
{
364426
uint32_t crc;
@@ -395,6 +457,7 @@ int bgzf_compress(void *_dst, size_t *dlen, const void *src, size_t slen, int le
395457
packInt32((uint8_t*)&dst[*dlen - 4], slen);
396458
return 0;
397459
}
460+
#endif // HAVE_LIBDEFLATE
398461

399462
static int bgzf_gzip_compress(BGZF *fp, void *_dst, size_t *dlen, const void *src, size_t slen, int level)
400463
{
@@ -438,6 +501,28 @@ static int deflate_block(BGZF *fp, int block_length)
438501
return comp_size;
439502
}
440503

504+
#ifdef HAVE_LIBDEFLATE
505+
506+
static int bgzf_uncompress(uint8_t *dst, size_t *dlen, const uint8_t *src, size_t slen) {
507+
struct libdeflate_decompressor *z = libdeflate_alloc_decompressor();
508+
if (!z) {
509+
hts_log_error("Call to libdeflate_alloc_decompressor failed");
510+
return -1;
511+
}
512+
513+
int ret = libdeflate_deflate_decompress(z, src, slen, dst, *dlen, dlen);
514+
libdeflate_free_decompressor(z);
515+
516+
if (ret != LIBDEFLATE_SUCCESS) {
517+
hts_log_error("Inflate operation failed: %d", ret);
518+
return -1;
519+
}
520+
521+
return 0;
522+
}
523+
524+
#else
525+
441526
static int bgzf_uncompress(uint8_t *dst, size_t *dlen, const uint8_t *src, size_t slen) {
442527
z_stream zs;
443528
zs.zalloc = NULL;
@@ -467,6 +552,7 @@ static int bgzf_uncompress(uint8_t *dst, size_t *dlen, const uint8_t *src, size_
467552
*dlen = *dlen - zs.avail_out;
468553
return 0;
469554
}
555+
#endif // HAVE_LIBDEFLATE
470556

471557
// Inflate the block in fp->compressed_block into fp->uncompressed_block
472558
static int inflate_block(BGZF* fp, int block_length)
@@ -482,7 +568,11 @@ static int inflate_block(BGZF* fp, int block_length)
482568
// Check CRC of uncompressed block matches the gzip header.
483569
// NB: we may wish to switch out the zlib crc32 for something more performant.
484570
// See PR#361 and issue#467
571+
#ifdef HAVE_LIBDEFLATE
572+
uint32_t c1 = libdeflate_crc32(0L, (unsigned char *)fp->uncompressed_block, dlen);
573+
#else
485574
uint32_t c1 = crc32(0L, (unsigned char *)fp->uncompressed_block, dlen);
575+
#endif
486576
uint32_t c2 = le_to_u32((uint8_t *)fp->compressed_block + block_length-8);
487577
if (c1 != c2) {
488578
fp->errcode |= BGZF_ERR_CRC;
@@ -1160,7 +1250,7 @@ static void *bgzf_mt_reader(void *vp) {
11601250
pthread_cond_signal(&mt->command_c);
11611251
pthread_mutex_unlock(&mt->command_m);
11621252
hts_tpool_process_destroy(mt->out_queue);
1163-
pthread_exit(NULL);
1253+
return NULL;
11641254

11651255
default:
11661256
break;
@@ -1182,7 +1272,7 @@ static void *bgzf_mt_reader(void *vp) {
11821272
// We tear down the multi-threaded decoder and revert to the old code.
11831273
hts_tpool_dispatch(mt->pool, mt->out_queue, bgzf_nul_func, j);
11841274
hts_tpool_process_ref_decr(mt->out_queue);
1185-
pthread_exit(&j->errcode);
1275+
return &j->errcode;
11861276
}
11871277

11881278
// Dispatch an empty block so EOF is spotted.
@@ -1193,7 +1283,7 @@ static void *bgzf_mt_reader(void *vp) {
11931283
hts_tpool_dispatch(mt->pool, mt->out_queue, bgzf_nul_func, j);
11941284
if (j->errcode != 0) {
11951285
hts_tpool_process_destroy(mt->out_queue);
1196-
pthread_exit(&j->errcode);
1286+
return &j->errcode;
11971287
}
11981288

11991289
// We hit EOF so can stop reading, but we may get a subsequent
@@ -1224,10 +1314,9 @@ static void *bgzf_mt_reader(void *vp) {
12241314
pthread_cond_signal(&mt->command_c);
12251315
pthread_mutex_unlock(&mt->command_m);
12261316
hts_tpool_process_destroy(mt->out_queue);
1227-
pthread_exit(NULL);
1317+
return NULL;
12281318
}
12291319
}
1230-
return NULL;
12311320
}
12321321

12331322
int bgzf_thread_pool(BGZF *fp, hts_tpool *pool, int qsize) {
@@ -1452,7 +1541,7 @@ ssize_t bgzf_block_write(BGZF *fp, const void *data, size_t length)
14521541
uint64_t ublock_size; // amount of uncompressed data to be fed into next block
14531542
while (remaining > 0) {
14541543
current_block = fp->idx->moffs - fp->idx->noffs;
1455-
ublock_size = fp->idx->offs[current_block+1].uaddr-fp->idx->offs[current_block].uaddr;
1544+
ublock_size = current_block + 1 < fp->idx->moffs ? fp->idx->offs[current_block+1].uaddr-fp->idx->offs[current_block].uaddr : BGZF_MAX_BLOCK_SIZE;
14561545
uint8_t* buffer = (uint8_t*)fp->uncompressed_block;
14571546
int copy_length = ublock_size - fp->block_offset;
14581547
if (copy_length > remaining) copy_length = remaining;
@@ -1462,7 +1551,8 @@ ssize_t bgzf_block_write(BGZF *fp, const void *data, size_t length)
14621551
remaining -= copy_length;
14631552
if (fp->block_offset == ublock_size) {
14641553
if (lazy_flush(fp) != 0) return -1;
1465-
fp->idx->noffs--; // decrement noffs to track the blocks
1554+
if (fp->idx->noffs > 0)
1555+
fp->idx->noffs--; // decrement noffs to track the blocks
14661556
}
14671557
}
14681558
return length - remaining;

0 commit comments

Comments
 (0)