Skip to content

Commit 8195fce

Browse files
authored
Merge pull request #185 from aldanor/feature/blosc-lzf-fix
Fix multiple filter problems and add tests
2 parents 81873ad + aefc45f commit 8195fce

File tree

5 files changed

+198
-55
lines changed

5 files changed

+198
-55
lines changed

CHANGELOG.md

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,18 @@
66

77
- `Error` now implements `From<Infallible>`, which allows passing convertible
88
extents (like tuples of integers) where `impl TryInto<Extents>` is required.
9-
- Support for HDF5 versions 1.12.1 and 1.10.8.
9+
- Support for HDF5 versions 1.12.1 and 1.10.8
10+
11+
### Changed
12+
13+
- Renamed `filters::gzip_available()` to `deflate_available()` (the old name is
14+
present but marked as deprecated).
15+
16+
### Fixed
17+
18+
- Fixed a bug where all blosc filter settings were discarded / zeroed out.
19+
- Fixed errors when recovering filter pipelines from stored datasets.
20+
- Fixed a bug where filter availability was computed incorrectly.
1021

1122
## 0.8.0
1223

src/hl/dataset.rs

Lines changed: 67 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1045,32 +1045,71 @@ impl<'d, T2: H5Type, D2: ndarray::Dimension> DatasetBuilderData<'d, T2, D2> {
10451045
impl_builder_methods!();
10461046
}
10471047

1048-
#[test]
1049-
fn test_compute_chunk_shape() {
1050-
let e = SimpleExtents::new(&[1, 1]);
1051-
assert_eq!(compute_chunk_shape(&e, 1), vec![1, 1]);
1052-
let e = SimpleExtents::new(&[1, 10]);
1053-
assert_eq!(compute_chunk_shape(&e, 3), vec![1, 3]);
1054-
let e = SimpleExtents::new(&[1, 10]);
1055-
assert_eq!(compute_chunk_shape(&e, 11), vec![1, 10]);
1056-
1057-
let e = SimpleExtents::new(&[Extent::from(1), Extent::from(10..)]);
1058-
assert_eq!(compute_chunk_shape(&e, 11), vec![1, 11]);
1059-
1060-
let e = SimpleExtents::new(&[Extent::from(1), Extent::from(10..)]);
1061-
assert_eq!(compute_chunk_shape(&e, 9), vec![1, 9]);
1062-
1063-
let e = SimpleExtents::new(&[4, 4, 4]);
1064-
// chunk shape should be greedy here, a minimal
1065-
// chunk shape would be (1, 3, 4) + (1, 1, 4)
1066-
assert_eq!(compute_chunk_shape(&e, 12), vec![1, 4, 4]);
1067-
1068-
let e = SimpleExtents::new(&[4, 4, 4]);
1069-
assert_eq!(compute_chunk_shape(&e, 100), vec![4, 4, 4]);
1070-
1071-
let e = SimpleExtents::new(&[4, 4, 4]);
1072-
assert_eq!(compute_chunk_shape(&e, 9), vec![1, 2, 4]);
1073-
1074-
let e = SimpleExtents::new(&[1, 1, 100]);
1075-
assert_eq!(compute_chunk_shape(&e, 51), vec![1, 1, 100]);
1048+
#[cfg(test)]
1049+
mod tests {
1050+
use super::{compute_chunk_shape, DatasetBuilder};
1051+
use crate::filters::Filter;
1052+
use crate::test::with_tmp_file;
1053+
use crate::{Extent, Result, SimpleExtents};
1054+
1055+
#[cfg(feature = "blosc")]
1056+
use crate::filters::{Blosc, BloscShuffle};
1057+
1058+
use ndarray::Array2;
1059+
1060+
#[allow(dead_code)]
1061+
fn check_filter(func: impl Fn(DatasetBuilder) -> DatasetBuilder, flt: Filter) {
1062+
let filters = vec![flt];
1063+
with_tmp_file::<Result<_>, _>(|file| {
1064+
let arr = Array2::<i64>::ones((1000, 20));
1065+
func(file.new_dataset_builder()).with_data(&arr).create("foo")?;
1066+
let ds = file.dataset("foo")?;
1067+
assert_eq!(ds.filters(), filters);
1068+
assert_eq!(ds.read_2d::<i64>()?, &arr);
1069+
Ok(())
1070+
})
1071+
.unwrap()
1072+
}
1073+
1074+
#[test]
1075+
#[cfg(feature = "blosc")]
1076+
fn test_blosc() {
1077+
check_filter(|d| d.blosc_zstd(9, true), Filter::Blosc(Blosc::ZStd, 9, BloscShuffle::Byte));
1078+
}
1079+
1080+
#[test]
1081+
#[cfg(feature = "lzf")]
1082+
fn test_lzf() {
1083+
check_filter(|d| d.lzf(), Filter::LZF);
1084+
}
1085+
1086+
#[test]
1087+
fn test_compute_chunk_shape() {
1088+
let e = SimpleExtents::new(&[1, 1]);
1089+
assert_eq!(compute_chunk_shape(&e, 1), vec![1, 1]);
1090+
let e = SimpleExtents::new(&[1, 10]);
1091+
assert_eq!(compute_chunk_shape(&e, 3), vec![1, 3]);
1092+
let e = SimpleExtents::new(&[1, 10]);
1093+
assert_eq!(compute_chunk_shape(&e, 11), vec![1, 10]);
1094+
1095+
let e = SimpleExtents::new(&[Extent::from(1), Extent::from(10..)]);
1096+
assert_eq!(compute_chunk_shape(&e, 11), vec![1, 11]);
1097+
1098+
let e = SimpleExtents::new(&[Extent::from(1), Extent::from(10..)]);
1099+
assert_eq!(compute_chunk_shape(&e, 9), vec![1, 9]);
1100+
1101+
let e = SimpleExtents::new(&[4, 4, 4]);
1102+
// chunk shape should be greedy here, a minimal
1103+
// chunk shape would be (1, 3, 4) + (1, 1, 4)
1104+
assert_eq!(compute_chunk_shape(&e, 12), vec![1, 4, 4]);
1105+
1106+
let e = SimpleExtents::new(&[4, 4, 4]);
1107+
assert_eq!(compute_chunk_shape(&e, 100), vec![4, 4, 4]);
1108+
1109+
let e = SimpleExtents::new(&[4, 4, 4]);
1110+
assert_eq!(compute_chunk_shape(&e, 9), vec![1, 2, 4]);
1111+
1112+
let e = SimpleExtents::new(&[1, 1, 100]);
1113+
assert_eq!(compute_chunk_shape(&e, 51), vec![1, 1, 100]);
1114+
}
10761115
}

src/hl/filters.rs

Lines changed: 113 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,15 @@ use hdf5_sys::h5p::{
77
};
88
use hdf5_sys::h5t::H5T_class_t;
99
use hdf5_sys::h5z::{
10-
H5Z_filter_t, H5Zfilter_avail, H5Zget_filter_info, H5Z_FILTER_CONFIG_DECODE_ENABLED,
10+
H5Zfilter_avail, H5Zget_filter_info, H5Z_FILTER_CONFIG_DECODE_ENABLED,
1111
H5Z_FILTER_CONFIG_ENCODE_ENABLED, H5Z_FILTER_DEFLATE, H5Z_FILTER_FLETCHER32, H5Z_FILTER_NBIT,
1212
H5Z_FILTER_SCALEOFFSET, H5Z_FILTER_SHUFFLE, H5Z_FILTER_SZIP, H5Z_FLAG_OPTIONAL,
1313
H5Z_SO_FLOAT_DSCALE, H5Z_SO_INT, H5_SZIP_EC_OPTION_MASK, H5_SZIP_MAX_PIXELS_PER_BLOCK,
1414
H5_SZIP_NN_OPTION_MASK,
1515
};
1616

17+
pub use hdf5_sys::h5z::H5Z_filter_t;
18+
1719
use crate::internal_prelude::*;
1820

1921
#[cfg(feature = "blosc")]
@@ -121,19 +123,26 @@ pub struct FilterInfo {
121123
pub(crate) fn register_filters() {
122124
#[cfg(feature = "lzf")]
123125
if let Err(e) = lzf::register_lzf() {
124-
eprintln!("{}", e);
126+
eprintln!("Error while registering LZF filter: {}", e);
125127
}
126128
#[cfg(feature = "blosc")]
127129
if let Err(e) = blosc::register_blosc() {
128-
eprintln!("{}", e);
130+
eprintln!("Error while registering Blosc filter: {}", e);
129131
}
130132
}
131133

132-
/// Returns `true` if gzip filter is available.
133-
pub fn gzip_available() -> bool {
134+
/// Returns `true` if deflate filter is available.
135+
pub fn deflate_available() -> bool {
134136
h5lock!(H5Zfilter_avail(H5Z_FILTER_DEFLATE) == 1)
135137
}
136138

139+
/// Returns `true` if deflate filter is available.
140+
#[doc(hidden)]
141+
#[deprecated(note = "deprecated; use deflate_available()")]
142+
pub fn gzip_available() -> bool {
143+
deflate_available()
144+
}
145+
137146
/// Returns `true` if szip filter is available.
138147
pub fn szip_available() -> bool {
139148
h5lock!(H5Zfilter_avail(H5Z_FILTER_SZIP) == 1)
@@ -167,7 +176,7 @@ impl Filter {
167176
}
168177

169178
pub fn get_info(filter_id: H5Z_filter_t) -> FilterInfo {
170-
if h5call!(H5Zfilter_avail(filter_id)).map(|x| x > 0).unwrap_or_default() {
179+
if !h5call!(H5Zfilter_avail(filter_id)).map(|x| x > 0).unwrap_or_default() {
171180
return FilterInfo::default();
172181
}
173182
let mut flags: c_uint = 0;
@@ -281,28 +290,25 @@ impl Filter {
281290
}
282291

283292
fn parse_deflate(cdata: &[c_uint]) -> Result<Self> {
284-
ensure!(cdata.len() == 1, "expected length 1 cdata for deflate filter");
293+
ensure!(!cdata.is_empty(), "expected cdata.len() >= 1 for deflate filter");
285294
ensure!(cdata[0] <= 9, "invalid deflate level: {}", cdata[0]);
286295
Ok(Self::deflate(cdata[0] as _))
287296
}
288297

289-
fn parse_shuffle(cdata: &[c_uint]) -> Result<Self> {
290-
ensure!(cdata.is_empty(), "expected length 0 cdata for shuffle filter");
298+
fn parse_shuffle(_cdata: &[c_uint]) -> Result<Self> {
291299
Ok(Self::shuffle())
292300
}
293301

294-
fn parse_fletcher32(cdata: &[c_uint]) -> Result<Self> {
295-
ensure!(cdata.is_empty(), "expected length 0 cdata for fletcher32 filter");
302+
fn parse_fletcher32(_cdata: &[c_uint]) -> Result<Self> {
296303
Ok(Self::fletcher32())
297304
}
298305

299-
fn parse_nbit(cdata: &[c_uint]) -> Result<Self> {
300-
ensure!(cdata.is_empty(), "expected length 0 cdata for nbit filter");
306+
fn parse_nbit(_cdata: &[c_uint]) -> Result<Self> {
301307
Ok(Self::nbit())
302308
}
303309

304310
fn parse_szip(cdata: &[c_uint]) -> Result<Self> {
305-
ensure!(cdata.len() == 2, "expected length 2 cdata for szip filter");
311+
ensure!(cdata.len() >= 2, "expected cdata.len() >= 2 for szip filter");
306312
let m = cdata[0];
307313
ensure!(
308314
(m & H5_SZIP_EC_OPTION_MASK != 0) != (m & H5_SZIP_NN_OPTION_MASK != 0),
@@ -321,7 +327,7 @@ impl Filter {
321327
}
322328

323329
fn parse_scaleoffset(cdata: &[c_uint]) -> Result<Self> {
324-
ensure!(cdata.len() == 2, "expected length 2 cdata for scaleoffset filter");
330+
ensure!(cdata.len() >= 2, "expected cdata.len() >= 2 for scaleoffset filter");
325331
let scale_type = cdata[0];
326332
let mode = if scale_type == (H5Z_SO_INT as c_uint) {
327333
ensure!(
@@ -344,8 +350,7 @@ impl Filter {
344350
}
345351

346352
#[cfg(feature = "lzf")]
347-
fn parse_lzf(cdata: &[c_uint]) -> Result<Self> {
348-
ensure!(cdata.is_empty(), "expected length 0 cdata for lzf filter");
353+
fn parse_lzf(_cdata: &[c_uint]) -> Result<Self> {
349354
Ok(Self::lzf())
350355
}
351356

@@ -506,9 +511,7 @@ impl Filter {
506511
ptr::null_mut(),
507512
));
508513
let cdata = &cd_values[..(cd_nelmts as _)];
509-
let flt = Self::from_raw(filter_id, cdata)
510-
.ok()
511-
.unwrap_or_else(|| Self::user(filter_id, cdata));
514+
let flt = Self::from_raw(filter_id, cdata)?;
512515
filters.push(flt);
513516
}
514517
Ok(filters)
@@ -523,6 +526,8 @@ pub(crate) fn validate_filters(filters: &[Filter], type_class: H5T_class_t) -> R
523526
let mut comp_filter: Option<&Filter> = None;
524527

525528
for filter in filters {
529+
ensure!(filter.is_available(), "Filter not available: {:?}", filter);
530+
526531
let id = filter.id();
527532

528533
if let Some(f) = map.get(&id) {
@@ -563,3 +568,91 @@ pub(crate) fn validate_filters(filters: &[Filter], type_class: H5T_class_t) -> R
563568

564569
Ok(())
565570
}
571+
572+
#[cfg(test)]
573+
mod tests {
574+
use hdf5_sys::h5t::H5T_class_t;
575+
576+
use super::{
577+
blosc_available, deflate_available, lzf_available, szip_available, validate_filters,
578+
Filter, FilterInfo, SZip, ScaleOffset,
579+
};
580+
use crate::test::with_tmp_file;
581+
use crate::{plist::DatasetCreate, Result};
582+
583+
#[test]
584+
fn test_filter_pipeline() -> Result<()> {
585+
let mut comp_filters = vec![];
586+
if deflate_available() {
587+
comp_filters.push(Filter::deflate(3));
588+
}
589+
if szip_available() {
590+
comp_filters.push(Filter::szip(SZip::Entropy, 8));
591+
}
592+
assert_eq!(cfg!(feature = "lzf"), lzf_available());
593+
#[cfg(feature = "lzf")]
594+
{
595+
comp_filters.push(Filter::lzf());
596+
}
597+
assert_eq!(cfg!(feature = "blosc"), blosc_available());
598+
#[cfg(feature = "blosc")]
599+
{
600+
use super::BloscShuffle;
601+
comp_filters.push(Filter::blosc_blosclz(1, false));
602+
comp_filters.push(Filter::blosc_lz4(3, true));
603+
comp_filters.push(Filter::blosc_lz4hc(5, BloscShuffle::Bit));
604+
comp_filters.push(Filter::blosc_zlib(7, BloscShuffle::None));
605+
comp_filters.push(Filter::blosc_zstd(9, BloscShuffle::Byte));
606+
comp_filters.push(Filter::blosc_snappy(0, BloscShuffle::Bit));
607+
}
608+
for c in &comp_filters {
609+
assert!(c.is_available());
610+
assert!(c.encode_enabled());
611+
assert!(c.decode_enabled());
612+
613+
let pipeline = vec![
614+
Filter::nbit(),
615+
Filter::shuffle(),
616+
c.clone(),
617+
Filter::fletcher32(),
618+
Filter::scale_offset(ScaleOffset::Integer(3)),
619+
];
620+
validate_filters(&pipeline, H5T_class_t::H5T_INTEGER)?;
621+
622+
let plist = DatasetCreate::try_new()?;
623+
for flt in &pipeline {
624+
flt.apply_to_plist(plist.id())?;
625+
}
626+
assert_eq!(Filter::extract_pipeline(plist.id())?, pipeline);
627+
628+
let mut b = DatasetCreate::build();
629+
b.set_filters(&pipeline);
630+
let plist = b.finish()?;
631+
assert_eq!(Filter::extract_pipeline(plist.id())?, pipeline);
632+
633+
let res = with_tmp_file(|file| {
634+
file.new_dataset_builder()
635+
.empty::<i32>()
636+
.shape((10_000, 20))
637+
.with_dcpl(|p| p.set_filters(&pipeline))
638+
.create("foo")
639+
.unwrap();
640+
let plist = file.dataset("foo").unwrap().dcpl().unwrap();
641+
Filter::extract_pipeline(plist.id()).unwrap()
642+
});
643+
assert_eq!(res, pipeline);
644+
}
645+
646+
let bad_filter = Filter::user(12_345, &[1, 2, 3, 4, 5]);
647+
assert_eq!(Filter::get_info(bad_filter.id()), FilterInfo::default());
648+
assert!(!bad_filter.is_available());
649+
assert!(!bad_filter.encode_enabled());
650+
assert!(!bad_filter.decode_enabled());
651+
assert_err!(
652+
validate_filters(&[bad_filter], H5T_class_t::H5T_INTEGER),
653+
"Filter not available"
654+
);
655+
656+
Ok(())
657+
}
658+
}

src/hl/filters/blosc.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ pub fn register_blosc() -> Result<(), &'static str> {
5656
extern "C" fn set_local_blosc(dcpl_id: hid_t, type_id: hid_t, _space_id: hid_t) -> herr_t {
5757
const MAX_NDIMS: usize = 32;
5858
let mut flags: c_uint = 0;
59-
let mut nelmts: size_t = 0;
59+
let mut nelmts: size_t = 8;
6060
let mut values: Vec<c_uint> = vec![0; 8];
6161
let ret = unsafe {
6262
H5Pget_filter_by_id2(

src/test.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,19 +4,19 @@ use tempfile::tempdir;
44

55
use crate::internal_prelude::*;
66

7-
pub fn with_tmp_dir<F: Fn(PathBuf)>(func: F) {
7+
pub fn with_tmp_dir<T, F: Fn(PathBuf) -> T>(func: F) -> T {
88
let dir = tempdir().unwrap();
99
let path = dir.path().to_path_buf();
10-
func(path);
10+
func(path)
1111
}
1212

13-
pub fn with_tmp_path<F: Fn(PathBuf)>(func: F) {
13+
pub fn with_tmp_path<T, F: Fn(PathBuf) -> T>(func: F) -> T {
1414
with_tmp_dir(|dir| func(dir.join("foo.h5")))
1515
}
1616

17-
pub fn with_tmp_file<F: Fn(File)>(func: F) {
17+
pub fn with_tmp_file<T, F: Fn(File) -> T>(func: F) -> T {
1818
with_tmp_path(|path| {
1919
let file = File::create(&path).unwrap();
20-
func(file);
20+
func(file)
2121
})
2222
}

0 commit comments

Comments
 (0)