2828MIN_CHUNK_SIZE = 1 * 1024 * 1024
2929MAC_CHUNK_SIZE = 8 * 1024 * 1024
3030
31+ H5Z_FILTER_MAP = { 32001 : "blosclz" ,
32+ 32004 : "lz4" ,
33+ 32008 : "bitshuffle" ,
34+ 32015 : "zstd" ,
35+ }
36+
37+
3138# check if hdf5 library version supports chunk iteration
3239hdf_library_version = h5py .version .hdf5_version_tuple
3340library_has_chunk_iter = (hdf_library_version >= (1 , 14 , 0 ) or (hdf_library_version < (1 , 12 , 0 ) and (hdf_library_version >= (1 , 10 , 10 ))))
@@ -761,8 +768,8 @@ def create_chunktable(dset, dset_dims, ctx):
761768 chunk_key += str (index [dim ] // chunk_dims [dim ])
762769 if dim < rank - 1 :
763770 chunk_key += "_"
764- logging .debug (f"adding chunk_key: { chunk_key } " )
765- chunk_map [chunk_key ] = (chunk_info .byte_offset , chunk_info .size )
771+ logging .debug (f"adding chunk_key: { chunk_key } " )
772+ chunk_map [chunk_key ] = (chunk_info .byte_offset , chunk_info .size )
766773
767774 chunks ["class" ] = "H5D_CHUNKED_REF"
768775 if not extend :
@@ -1121,6 +1128,7 @@ def create_dataset(dobj, ctx):
11211128 # or vlen
11221129 pass
11231130 else :
1131+ logging .debug (f"filter setup for { dobj .name } " )
11241132 if not ctx ["ignorefilters" ]:
11251133 kwargs ["compression" ] = dobj .compression
11261134 kwargs ["compression_opts" ] = dobj .compression_opts
@@ -1134,7 +1142,7 @@ def create_dataset(dobj, ctx):
11341142
11351143 # TBD: it would be better if HSDS could let us know what filters
11361144 # are supported (like it does with compressors)
1137- # For now, just hard-code fletcher32 and scaleoffset to be ignored
1145+ # For now, just hard-ccreate_datasetcreate_datasetode fletcher32 and scaleoffset to be ignored
11381146 if dobj .fletcher32 :
11391147 msg = f"fletcher32 filter used by dataset: { dobj .name } is not "
11401148 msg += "supported by HSDS, this filter will not be used"
@@ -1144,7 +1152,35 @@ def create_dataset(dobj, ctx):
11441152 msg = f"scaleoffset filter used by dataset: { dobj .name } is not "
11451153 msg += "supported by HSDS, this filter will not be used"
11461154 logging .warning (msg )
1147- # kwargs["scaleoffset"] = dobj.scaleoffset
1155+
1156+ if is_h5py (dobj ) and not kwargs .get ("compression" ):
1157+ # apply any custom filters as long as they are supported in HSDS
1158+ for filter_id in dobj ._filters :
1159+ filter_opts = dobj ._filters [filter_id ]
1160+ try :
1161+ filter_id = int (filter_id )
1162+ except ValueError :
1163+ msg = "unrecognized filter id: {filter_id} for {dobj.name}, ignoring"
1164+ logging .warning (msg )
1165+
1166+ if not isinstance (filter_id , int ):
1167+ continue
1168+
1169+ if filter_id in H5Z_FILTER_MAP :
1170+ filter_name = H5Z_FILTER_MAP [filter_id ]
1171+ if filter_name == "bitshuffle" :
1172+ kwargs ["shuffle" ] = filter_name
1173+ logging .info (f"using bitshuffle on { dobj .name } " )
1174+ else :
1175+ # supported non-standard compressor
1176+ kwargs ["compression" ] = filter_name
1177+ logging .info (f"using compressor: { filter_name } for { dobj .name } " )
1178+ kwargs ["compression_opts" ] = filter_opts
1179+ logging .info (f"compression_opts: { filter_opts } " )
1180+ else :
1181+ logging .warning (f"filter id { filter_id } for { dobj .name } not supported" )
1182+
1183+ # kwargs["scaleoffset"] = dobj.scaleoffset
11481184 # setting the fillvalue is failing in some cases
11491185 # see: https://github.com/HDFGroup/h5pyd/issues/119
11501186 # don't set fill value for reference types
@@ -1501,14 +1537,15 @@ def load_file(
15011537
15021538 logging .info (f"input file: { fin .filename } " )
15031539 logging .info (f"output file: { fout .filename } " )
1540+ logging .info (f"dataload: { dataload } " )
15041541 if dataload != "ingest" :
15051542 if not dataload :
15061543 logging .info ("no data load" )
15071544 elif dataload in ("link" , "fastlink" ):
15081545 if not s3path :
15091546 logging .error ("s3path expected to be set" )
15101547 sys .exit (1 )
1511- logging .info ("using s3path" )
1548+ logging .info (f "using s3path: { s3path } " )
15121549 else :
15131550 logging .error (f"unexpected dataload value: { dataload } " )
15141551 sys .exit (1 )
0 commit comments