Skip to content

Commit 0ebe6fa

Browse files
authored
fix bug in compressed format (#116)
* fix bug in compressed format * update pysz to 1.0.3
1 parent a32faab commit 0ebe6fa

File tree

21 files changed

+501
-695
lines changed

21 files changed

+501
-695
lines changed

CMakeLists.txt

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
cmake_minimum_required(VERSION 3.18)
2-
project(SZ3 VERSION 3.3.1)
2+
project(SZ3 VERSION 3.3.2)
33

44
#data version defines the version of the compressed data format
55
#it is not always equal to the program version (e.g., SZ3 v3.1.0 and SZ3 v.3.1.1 may use the same data version of v.3.1.0)
66
#only update data version if the new version of the program changes compressed data format
7-
set(SZ3_DATA_VERSION 3.3.0)
7+
set(SZ3_DATA_VERSION 3.3.2)
88

99
include(GNUInstallDirs)
1010

@@ -47,12 +47,6 @@ if (OpenMP_FOUND)
4747
target_link_libraries(${PROJECT_NAME} INTERFACE OpenMP::OpenMP_CXX)
4848
endif ()
4949

50-
if (MSVC)
51-
option(SZ3_USE_BUNDLED_ZSTD "prefer the bundled version of Zstd" ON)
52-
else ()
53-
option(SZ3_USE_BUNDLED_ZSTD "prefer the bundled version of Zstd" OFF)
54-
endif ()
55-
5650
option(SZ3_DEBUG_TIMINGS "print debug timing information" OFF)
5751
if (SZ3_DEBUG_TIMINGS)
5852
target_compile_definitions(${PROJECT_NAME} INTERFACE SZ3_DEBUG_TIMINGS=1)
@@ -67,6 +61,11 @@ else ()
6761
target_compile_definitions(${PROJECT_NAME} INTERFACE SZ3_USE_SKA_HASH=0)
6862
endif ()
6963

64+
if (MSVC)
65+
option(SZ3_USE_BUNDLED_ZSTD "prefer the bundled version of Zstd" ON)
66+
else ()
67+
option(SZ3_USE_BUNDLED_ZSTD "prefer the bundled version of Zstd" OFF)
68+
endif ()
7069
pkg_search_module(ZSTD IMPORTED_TARGET libzstd)
7170
if (ZSTD_FOUND AND NOT SZ3_USE_BUNDLED_ZSTD)
7271
target_link_libraries(${PROJECT_NAME} INTERFACE PkgConfig::ZSTD)

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ Version New features
7878
* SZ 3.2.0 API reconstructed for FZ. H5Z-SZ3 rewrite. Compression version checking.
7979
* SZ 3.3.0 Add key QoZ v1 and v2 features to improve compression speed and data quality. The full QoZ is available from **a separate branch** (https://github.com/szcompressor/SZ3/tree/QoZ).
8080
* SZ 3.3.1: SZ3 Windows support for both Visual Studio and MinGW toolchains. pySZ v1 released and available via `pip install pysz`. Bio algorithms added.
81+
* SZ 3.3.2: bugfix for compressed format.
8182

8283
## 3rd party libraries/tools
8384
* [Zstandard](https://facebook.github.io/zstd/) v1.4.5 will be fetched if libzstd can not be found by pkg-config.

include/SZ3/api/sz.hpp

Lines changed: 96 additions & 102 deletions
Original file line numberDiff line numberDiff line change
@@ -1,104 +1,97 @@
1+
/**
2+
* @file sz.hpp
3+
* @brief SZ3 compression and decompression API.
4+
*
5+
* This header provides the main API functions for compressing and decompressing data using SZ3.
6+
*
7+
* Compressed Data Format of SZ3:
8+
* The compressed data is always stored in little-endian order.
9+
* The compressed data consists of three main sections:
10+
* 1. Header (16 bytes): Contains metadata about the compressed data.
11+
* - Magic Number (4 bytes): Identifies the data as SZ3-compressed.
12+
* - Version (4 bytes): Indicates the version of the SZ3 format.
13+
* - Compressed Size (8 bytes): Specifies the size of the compressed payload.
14+
* 2. Compressed Payload: The actual compressed data.
15+
* 3. Configuration: Stores the compression configuration used.
16+
*
17+
* The layout can be visualized as follows:
18+
* [ Header (16 bytes) | Compressed Payload | Configuration ]
19+
*/
20+
121
#ifndef SZ3_SZ_HPP
222
#define SZ3_SZ_HPP
323

424
#include "SZ3/api/impl/SZImpl.hpp"
525
#include "SZ3/version.hpp"
626

27+
728
/**
8-
* API for compression
9-
* @tparam T source data type
10-
* @param config compression configuration. Please update the config with 1). data dimension and shape and 2). desired
11-
settings.
12-
* @param data source data
13-
* @param cmpData pre-allocated buffer for compressed data
14-
* @param cmpCap pre-allocated buffer size (in bytes) for compressed data
15-
* @return compressed data size (in bytes)
16-
17-
The compression algorithms are:
18-
ALGO_INTERP_LORENZO:
19-
The default algorithm in SZ3. It is the implementation of our ICDE'21 paper.
20-
The whole dataset will be compressed by interpolation or lorenzo predictor with auto-optimized settings.
21-
ALGO_INTERP:
22-
The whole dataset will be compressed by interpolation predictor with default settings.
23-
ALGO_LORENZO_REG:
24-
The whole dataset will be compressed by lorenzo and/or regression based predictors block by block with default
25-
settings. The four predictors ( 1st-order lorenzo, 2nd-order lorenzo, 1st-order regression, 2nd-order regression) can be
26-
enabled or disabled independently by conf settings (lorenzo, lorenzo2, regression, regression2).
27-
28-
Interpolation+lorenzo example:
29-
SZ3::Config conf(100, 200, 300); // 300 is the fastest dimension
30-
conf.cmprAlgo = SZ3::ALGO_INTERP_LORENZO;
31-
conf.errorBoundMode = SZ3::EB_ABS; // refer to def.hpp for all supported error bound mode
32-
conf.absErrorBound = 1E-3; // absolute error bound 1e-3
33-
char *compressedData = SZ_compress(conf, data, outSize);
34-
35-
Interpolation example:
36-
SZ3::Config conf(100, 200, 300); // 300 is the fastest dimension
37-
conf.cmprAlgo = SZ3::ALGO_INTERP;
38-
conf.errorBoundMode = SZ3::EB_REL; // refer to def.hpp for all supported error bound mode
39-
conf.relErrorBound = 1E-3; // value-rang-based error bound 1e-3
40-
char *compressedData = SZ_compress(conf, data, outSize);
41-
42-
Lorenzo/regression example :
43-
SZ3::Config conf(100, 200, 300); // 300 is the fastest dimension
44-
conf.cmprAlgo = SZ3::ALGO_LORENZO_REG;
45-
conf.lorenzo = true; // only use 1st order lorenzo
46-
conf.lorenzo2 = false;
47-
conf.regression = false;
48-
conf.regression2 = false;
49-
conf.errorBoundMode = SZ3::EB_ABS; // refer to def.hpp for all supported error bound mode
50-
conf.absErrorBound = 1E-3; // absolute error bound 1e-3
51-
char *compressedData = SZ_compress(conf, data, outSize);
29+
* Compresses the input data using the provided configuration and stores the result in a pre-allocated buffer.
30+
* @tparam T The data type of the source data.
31+
* @param config The compression configuration.
32+
* @param data Pointer to the source data array.
33+
* @param cmpData Pointer to the pre-allocated buffer for compressed data.
34+
* @param cmpCap The size of the pre-allocated buffer in bytes.
35+
* @return The size of the compressed data in bytes.
36+
* @example
37+
* SZ3::Config conf(100, 200, 300); // 300 is the fastest dimension
38+
* conf.errorBoundMode = SZ3::EB_ABS; // Refer to def.hpp for supported error bound modes
39+
* conf.absErrorBound = 1E-3; // Absolute error bound of 1e-3
40+
* size_t outSize = SZ_compress(conf, data, outBuff, outBuffCap);
5241
*/
5342
template <class T>
54-
size_t SZ_compress(const SZ3::Config &config, const T *data, char *cmpData, size_t cmpCap) {
43+
size_t SZ_compress(const SZ3::Config& config, const T* data, char* cmpData, size_t cmpCap) {
5544
using namespace SZ3;
5645
Config conf(config);
5746

5847
if (cmpCap < SZ_compress_size_bound<T>(conf)) {
5948
throw std::invalid_argument(SZ3_ERROR_COMP_BUFFER_NOT_LARGE_ENOUGH);
6049
}
6150

62-
auto confEstSize = conf.size_est();
63-
auto cmpDataPos = reinterpret_cast<uchar *>(cmpData) + confEstSize;
64-
memset(cmpData, 0, confEstSize);
65-
auto cmpDataCap = cmpCap - conf.size_est();
51+
auto cmpDataPos = reinterpret_cast<uchar*>(cmpData);
6652

67-
size_t cmpDataLen = 0;
53+
// save 16 bytes header
54+
write(config.sz3MagicNumber, cmpDataPos); // magic number (4 bytes)
55+
write(config.sz3DataVer, cmpDataPos); // data version (4 bytes)
56+
auto sizeHeaderPos = cmpDataPos;
57+
cmpDataPos += 8; // reserve space for cmp data size (8 bytes)
58+
59+
// begin compression
60+
auto cmpDataCap = cmpCap - 16 - conf.size_est() * 2;
61+
uint64_t cmpDataSize = 0;
6862
if (conf.N == 1) {
69-
cmpDataLen = SZ_compress_impl<T, 1>(conf, data, cmpDataPos, cmpDataCap);
63+
cmpDataSize = SZ_compress_impl<T, 1>(conf, data, cmpDataPos, cmpDataCap);
7064
} else if (conf.N == 2) {
71-
cmpDataLen = SZ_compress_impl<T, 2>(conf, data, cmpDataPos, cmpDataCap);
65+
cmpDataSize = SZ_compress_impl<T, 2>(conf, data, cmpDataPos, cmpDataCap);
7266
} else if (conf.N == 3) {
73-
cmpDataLen = SZ_compress_impl<T, 3>(conf, data, cmpDataPos, cmpDataCap);
67+
cmpDataSize = SZ_compress_impl<T, 3>(conf, data, cmpDataPos, cmpDataCap);
7468
} else if (conf.N == 4) {
75-
cmpDataLen = SZ_compress_impl<T, 4>(conf, data, cmpDataPos, cmpDataCap);
69+
cmpDataSize = SZ_compress_impl<T, 4>(conf, data, cmpDataPos, cmpDataCap);
7670
} else {
7771
throw std::invalid_argument("Data dimension higher than 4 is not supported.");
7872
}
7973

80-
auto cmpConfPos = reinterpret_cast<uchar *>(cmpData);
81-
auto confSize = conf.save(cmpConfPos);
82-
if (confSize > confEstSize) {
83-
throw std::length_error("buffer allocated for config is not large enough.");
84-
}
74+
// save compressed data size back in header
75+
write(cmpDataSize, sizeHeaderPos);
8576

86-
return confSize + cmpDataLen;
77+
// save config
78+
cmpDataPos += cmpDataSize;
79+
auto confSize = conf.save(cmpDataPos);
80+
81+
return 16 + cmpDataSize + confSize;
8782
}
8883

8984
/**
90-
* API for compression
91-
* @tparam T source data type
92-
* @param config config compression configuration
93-
* @param data source data
94-
* @param cmpSize compressed data size (in bytes)
95-
* @return compressed data, remember to 'delete []' when the data is no longer needed.
96-
*
97-
* Similar with SZ_compress(SZ3::Config &conf, const T *data, char *cmpData, size_t cmpCap)
98-
* The only difference is this one doesn't need the pre-allocated buffer (thus remember to do 'delete []' yourself)
85+
* Compresses the input data using the provided configuration and returns a newly allocated buffer containing the compressed data.
86+
* @tparam T The data type of the source data.
87+
* @param config The compression configuration.
88+
* @param data Pointer to the source data array.
89+
* @param cmpSize Output parameter set to the size of the compressed data in bytes.
90+
* @return Pointer to the newly allocated buffer containing the compressed data. The caller is responsible for deleting this buffer using 'delete[]'.
91+
* @note This function allocates memory for the compressed data. Ensure to free it when no longer needed.
9992
*/
10093
template <class T>
101-
char *SZ_compress(const SZ3::Config &config, const T *data, size_t &cmpSize) {
94+
char* SZ_compress(const SZ3::Config& config, const T* data, size_t& cmpSize) {
10295
using namespace SZ3;
10396

10497
size_t bufferLen = SZ_compress_size_bound<T>(config);
@@ -109,27 +102,29 @@ char *SZ_compress(const SZ3::Config &config, const T *data, size_t &cmpSize) {
109102
}
110103

111104
/**
112-
* API for decompression
113-
* @tparam T decompressed data type
114-
* @param config configuration placeholder. It will be overwritten by the compression configuration
115-
* @param cmpData compressed data
116-
* @param cmpSize compressed data size in bytes
117-
* @param decData pre-allocated buffer for decompressed data
118-
119-
example:
120-
auto decData = new float[100*200*300];
121-
SZ3::Config conf;
122-
SZ_decompress(conf, cmpData, cmpSize, decData);
123-
105+
* Decompresses the compressed data into a pre-allocated buffer using the configuration loaded from the compressed data.
106+
* @tparam T The data type of the decompressed data.
107+
* @param config Configuration placeholder that will be overwritten with the compression configuration from the compressed data.
108+
* @param cmpData Pointer to the compressed data.
109+
* @param cmpSize The size of the compressed data in bytes.
110+
* @param decData Reference to a pointer for the pre-allocated buffer for decompressed data. If null, a new buffer is allocated.
111+
* @example
112+
* auto decData = new float[100 * 200 * 300];
113+
* SZ3::Config conf;
114+
* SZ_decompress(conf, cmpData, cmpSize, decData);
124115
*/
125116
template <class T>
126-
void SZ_decompress(SZ3::Config &config, const char *cmpData, size_t cmpSize, T *&decData) {
117+
void SZ_decompress(SZ3::Config& config, const char* cmpData, size_t cmpSize, T*& decData) {
127118
using namespace SZ3;
128-
auto cmpConfPos = reinterpret_cast<const uchar *>(cmpData);
129-
config.load(cmpConfPos);
119+
120+
auto cmpDataPos = reinterpret_cast<const uchar*>(cmpData);
121+
122+
read(config.sz3MagicNumber, cmpDataPos);
130123
if (config.sz3MagicNumber != SZ3_MAGIC_NUMBER) {
131124
throw std::invalid_argument("magic number mismatch, the input data is not compressed by SZ3");
132125
}
126+
127+
read(config.sz3DataVer, cmpDataPos);
133128
if (versionStr(config.sz3DataVer) != SZ3_DATA_VER) {
134129
std::stringstream ss;
135130
printf("program v%s , program-data %s , input data v%s\n", SZ3_VER, SZ3_DATA_VER,
@@ -139,9 +134,11 @@ void SZ_decompress(SZ3::Config &config, const char *cmpData, size_t cmpSize, T *
139134
throw std::invalid_argument(ss.str());
140135
}
141136

137+
uint64_t cmpDataSize = 0;
138+
read(cmpDataSize, cmpDataPos);
142139

143-
auto cmpDataPos = reinterpret_cast<const uchar *>(cmpData) + config.size_est();
144-
auto cmpDataSize = cmpSize - config.size_est();
140+
auto cmpConfPos = cmpDataPos + cmpDataSize;
141+
config.load(cmpConfPos);
145142

146143
if (decData == nullptr) {
147144
decData = new T[config.num];
@@ -160,24 +157,21 @@ void SZ_decompress(SZ3::Config &config, const char *cmpData, size_t cmpSize, T *
160157
}
161158

162159
/**
163-
* API for decompression
164-
* Similar with SZ_decompress(SZ3::Config &config, char *cmpData, size_t cmpSize, T *&decData)
165-
* The only difference is this one doesn't need pre-allocated buffer for decompressed data
166-
*
167-
* @tparam T decompressed data type
168-
* @param config configuration placeholder. It will be overwritten by the compression configuration
169-
* @param cmpData compressed data
170-
* @param cmpSize compressed data size in bytes
171-
* @return decompressed data, remember to 'delete []' when the data is no longer needed.
172-
173-
example:
174-
SZ3::Config conf;
175-
float decompressedData = SZ_decompress(conf, cmpData, cmpSize)
160+
* Decompresses the compressed data into a pre-allocated buffer using the configuration loaded from the compressed data.
161+
* @tparam T The data type of the decompressed data.
162+
* @param config Configuration placeholder that will be overwritten with the compression configuration from the compressed data.
163+
* @param cmpData Pointer to the compressed data.
164+
* @param cmpSize The size of the compressed data in bytes.
165+
* @param decData Reference to a pointer for the pre-allocated buffer for decompressed data. If null, a new buffer is allocated.
166+
* @example
167+
* auto decData = new float[100 * 200 * 300];
168+
* SZ3::Config conf;
169+
* SZ_decompress(conf, cmpData, cmpSize, decData);
176170
*/
177171
template <class T>
178-
T *SZ_decompress(SZ3::Config &config, const char *cmpData, size_t cmpSize) {
172+
T* SZ_decompress(SZ3::Config& config, const char* cmpData, size_t cmpSize) {
179173
using namespace SZ3;
180-
T *decData = nullptr;
174+
T* decData = nullptr;
181175
SZ_decompress<T>(config, cmpData, cmpSize, decData);
182176
return decData;
183177
}

0 commit comments

Comments
 (0)