1+ /* *
2+ * @file sz.hpp
3+ * @brief SZ3 compression and decompression API.
4+ *
5+ * This header provides the main API functions for compressing and decompressing data using SZ3.
6+ *
7+ * Compressed Data Format of SZ3:
8+ * The compressed data is always stored in little-endian order.
9+ * The compressed data consists of three main sections:
10+ * 1. Header (16 bytes): Contains metadata about the compressed data.
11+ * - Magic Number (4 bytes): Identifies the data as SZ3-compressed.
12+ * - Version (4 bytes): Indicates the version of the SZ3 format.
13+ * - Compressed Size (8 bytes): Specifies the size of the compressed payload.
14+ * 2. Compressed Payload: The actual compressed data.
15+ * 3. Configuration: Stores the compression configuration used.
16+ *
17+ * The layout can be visualized as follows:
18+ * [ Header (16 bytes) | Compressed Payload | Configuration ]
19+ */
20+
121#ifndef SZ3_SZ_HPP
222#define SZ3_SZ_HPP
323
424#include " SZ3/api/impl/SZImpl.hpp"
525#include " SZ3/version.hpp"
626
27+
728/* *
8- * API for compression
9- * @tparam T source data type
10- * @param config compression configuration. Please update the config with 1). data dimension and shape and 2). desired
11- settings.
12- * @param data source data
13- * @param cmpData pre-allocated buffer for compressed data
14- * @param cmpCap pre-allocated buffer size (in bytes) for compressed data
15- * @return compressed data size (in bytes)
16-
17- The compression algorithms are:
18- ALGO_INTERP_LORENZO:
19- The default algorithm in SZ3. It is the implementation of our ICDE'21 paper.
20- The whole dataset will be compressed by interpolation or lorenzo predictor with auto-optimized settings.
21- ALGO_INTERP:
22- The whole dataset will be compressed by interpolation predictor with default settings.
23- ALGO_LORENZO_REG:
24- The whole dataset will be compressed by lorenzo and/or regression based predictors block by block with default
25- settings. The four predictors ( 1st-order lorenzo, 2nd-order lorenzo, 1st-order regression, 2nd-order regression) can be
26- enabled or disabled independently by conf settings (lorenzo, lorenzo2, regression, regression2).
27-
28- Interpolation+lorenzo example:
29- SZ3::Config conf(100, 200, 300); // 300 is the fastest dimension
30- conf.cmprAlgo = SZ3::ALGO_INTERP_LORENZO;
31- conf.errorBoundMode = SZ3::EB_ABS; // refer to def.hpp for all supported error bound mode
32- conf.absErrorBound = 1E-3; // absolute error bound 1e-3
33- char *compressedData = SZ_compress(conf, data, outSize);
34-
35- Interpolation example:
36- SZ3::Config conf(100, 200, 300); // 300 is the fastest dimension
37- conf.cmprAlgo = SZ3::ALGO_INTERP;
38- conf.errorBoundMode = SZ3::EB_REL; // refer to def.hpp for all supported error bound mode
39- conf.relErrorBound = 1E-3; // value-rang-based error bound 1e-3
40- char *compressedData = SZ_compress(conf, data, outSize);
41-
42- Lorenzo/regression example :
43- SZ3::Config conf(100, 200, 300); // 300 is the fastest dimension
44- conf.cmprAlgo = SZ3::ALGO_LORENZO_REG;
45- conf.lorenzo = true; // only use 1st order lorenzo
46- conf.lorenzo2 = false;
47- conf.regression = false;
48- conf.regression2 = false;
49- conf.errorBoundMode = SZ3::EB_ABS; // refer to def.hpp for all supported error bound mode
50- conf.absErrorBound = 1E-3; // absolute error bound 1e-3
51- char *compressedData = SZ_compress(conf, data, outSize);
29+ * Compresses the input data using the provided configuration and stores the result in a pre-allocated buffer.
30+ * @tparam T The data type of the source data.
31+ * @param config The compression configuration.
32+ * @param data Pointer to the source data array.
33+ * @param cmpData Pointer to the pre-allocated buffer for compressed data.
34+ * @param cmpCap The size of the pre-allocated buffer in bytes.
35+ * @return The size of the compressed data in bytes.
36+ * @example
37+ * SZ3::Config conf(100, 200, 300); // 300 is the fastest dimension
38+ * conf.errorBoundMode = SZ3::EB_ABS; // Refer to def.hpp for supported error bound modes
39+ * conf.absErrorBound = 1E-3; // Absolute error bound of 1e-3
40+ * size_t outSize = SZ_compress(conf, data, outBuff, outBuffCap);
5241 */
5342template <class T >
54- size_t SZ_compress (const SZ3::Config & config, const T * data, char * cmpData, size_t cmpCap) {
43+ size_t SZ_compress (const SZ3::Config& config, const T* data, char * cmpData, size_t cmpCap) {
5544 using namespace SZ3 ;
5645 Config conf (config);
5746
5847 if (cmpCap < SZ_compress_size_bound<T>(conf)) {
5948 throw std::invalid_argument (SZ3_ERROR_COMP_BUFFER_NOT_LARGE_ENOUGH);
6049 }
6150
62- auto confEstSize = conf.size_est ();
63- auto cmpDataPos = reinterpret_cast <uchar *>(cmpData) + confEstSize;
64- memset (cmpData, 0 , confEstSize);
65- auto cmpDataCap = cmpCap - conf.size_est ();
51+ auto cmpDataPos = reinterpret_cast <uchar*>(cmpData);
6652
67- size_t cmpDataLen = 0 ;
53+ // save 16 bytes header
54+ write (config.sz3MagicNumber , cmpDataPos); // magic number (4 bytes)
55+ write (config.sz3DataVer , cmpDataPos); // data version (4 bytes)
56+ auto sizeHeaderPos = cmpDataPos;
57+ cmpDataPos += 8 ; // reserve space for cmp data size (8 bytes)
58+
59+ // begin compression
60+ auto cmpDataCap = cmpCap - 16 - conf.size_est () * 2 ;
61+ uint64_t cmpDataSize = 0 ;
6862 if (conf.N == 1 ) {
69- cmpDataLen = SZ_compress_impl<T, 1 >(conf, data, cmpDataPos, cmpDataCap);
63+ cmpDataSize = SZ_compress_impl<T, 1 >(conf, data, cmpDataPos, cmpDataCap);
7064 } else if (conf.N == 2 ) {
71- cmpDataLen = SZ_compress_impl<T, 2 >(conf, data, cmpDataPos, cmpDataCap);
65+ cmpDataSize = SZ_compress_impl<T, 2 >(conf, data, cmpDataPos, cmpDataCap);
7266 } else if (conf.N == 3 ) {
73- cmpDataLen = SZ_compress_impl<T, 3 >(conf, data, cmpDataPos, cmpDataCap);
67+ cmpDataSize = SZ_compress_impl<T, 3 >(conf, data, cmpDataPos, cmpDataCap);
7468 } else if (conf.N == 4 ) {
75- cmpDataLen = SZ_compress_impl<T, 4 >(conf, data, cmpDataPos, cmpDataCap);
69+ cmpDataSize = SZ_compress_impl<T, 4 >(conf, data, cmpDataPos, cmpDataCap);
7670 } else {
7771 throw std::invalid_argument (" Data dimension higher than 4 is not supported." );
7872 }
7973
80- auto cmpConfPos = reinterpret_cast <uchar *>(cmpData);
81- auto confSize = conf.save (cmpConfPos);
82- if (confSize > confEstSize) {
83- throw std::length_error (" buffer allocated for config is not large enough." );
84- }
74+ // save compressed data size back in header
75+ write (cmpDataSize, sizeHeaderPos);
8576
86- return confSize + cmpDataLen;
77+ // save config
78+ cmpDataPos += cmpDataSize;
79+ auto confSize = conf.save (cmpDataPos);
80+
81+ return 16 + cmpDataSize + confSize;
8782}
8883
8984/* *
90- * API for compression
91- * @tparam T source data type
92- * @param config config compression configuration
93- * @param data source data
94- * @param cmpSize compressed data size (in bytes)
95- * @return compressed data, remember to 'delete []' when the data is no longer needed.
96- *
97- * Similar with SZ_compress(SZ3::Config &conf, const T *data, char *cmpData, size_t cmpCap)
98- * The only difference is this one doesn't need the pre-allocated buffer (thus remember to do 'delete []' yourself)
85+ * Compresses the input data using the provided configuration and returns a newly allocated buffer containing the compressed data.
86+ * @tparam T The data type of the source data.
87+ * @param config The compression configuration.
88+ * @param data Pointer to the source data array.
89+ * @param cmpSize Output parameter set to the size of the compressed data in bytes.
90+ * @return Pointer to the newly allocated buffer containing the compressed data. The caller is responsible for deleting this buffer using 'delete[]'.
91+ * @note This function allocates memory for the compressed data. Ensure to free it when no longer needed.
9992 */
10093template <class T >
101- char * SZ_compress (const SZ3::Config & config, const T * data, size_t & cmpSize) {
94+ char * SZ_compress (const SZ3::Config& config, const T* data, size_t & cmpSize) {
10295 using namespace SZ3 ;
10396
10497 size_t bufferLen = SZ_compress_size_bound<T>(config);
@@ -109,27 +102,29 @@ char *SZ_compress(const SZ3::Config &config, const T *data, size_t &cmpSize) {
109102}
110103
111104/* *
112- * API for decompression
113- * @tparam T decompressed data type
114- * @param config configuration placeholder. It will be overwritten by the compression configuration
115- * @param cmpData compressed data
116- * @param cmpSize compressed data size in bytes
117- * @param decData pre-allocated buffer for decompressed data
118-
119- example:
120- auto decData = new float[100*200*300];
121- SZ3::Config conf;
122- SZ_decompress(conf, cmpData, cmpSize, decData);
123-
105+ * Decompresses the compressed data into a pre-allocated buffer using the configuration loaded from the compressed data.
106+ * @tparam T The data type of the decompressed data.
107+ * @param config Configuration placeholder that will be overwritten with the compression configuration from the compressed data.
108+ * @param cmpData Pointer to the compressed data.
109+ * @param cmpSize The size of the compressed data in bytes.
110+ * @param decData Reference to a pointer for the pre-allocated buffer for decompressed data. If null, a new buffer is allocated.
111+ * @example
112+ * auto decData = new float[100 * 200 * 300];
113+ * SZ3::Config conf;
114+ * SZ_decompress(conf, cmpData, cmpSize, decData);
124115 */
125116template <class T >
126- void SZ_decompress (SZ3::Config & config, const char * cmpData, size_t cmpSize, T *& decData) {
117+ void SZ_decompress (SZ3::Config& config, const char * cmpData, size_t cmpSize, T*& decData) {
127118 using namespace SZ3 ;
128- auto cmpConfPos = reinterpret_cast <const uchar *>(cmpData);
129- config.load (cmpConfPos);
119+
120+ auto cmpDataPos = reinterpret_cast <const uchar*>(cmpData);
121+
122+ read (config.sz3MagicNumber , cmpDataPos);
130123 if (config.sz3MagicNumber != SZ3_MAGIC_NUMBER) {
131124 throw std::invalid_argument (" magic number mismatch, the input data is not compressed by SZ3" );
132125 }
126+
127+ read (config.sz3DataVer , cmpDataPos);
133128 if (versionStr (config.sz3DataVer ) != SZ3_DATA_VER) {
134129 std::stringstream ss;
135130 printf (" program v%s , program-data %s , input data v%s\n " , SZ3_VER, SZ3_DATA_VER,
@@ -139,9 +134,11 @@ void SZ_decompress(SZ3::Config &config, const char *cmpData, size_t cmpSize, T *
139134 throw std::invalid_argument (ss.str ());
140135 }
141136
137+ uint64_t cmpDataSize = 0 ;
138+ read (cmpDataSize, cmpDataPos);
142139
143- auto cmpDataPos = reinterpret_cast < const uchar *>(cmpData) + config. size_est () ;
144- auto cmpDataSize = cmpSize - config.size_est ( );
140+ auto cmpConfPos = cmpDataPos + cmpDataSize ;
141+ config.load (cmpConfPos );
145142
146143 if (decData == nullptr ) {
147144 decData = new T[config.num ];
@@ -160,24 +157,21 @@ void SZ_decompress(SZ3::Config &config, const char *cmpData, size_t cmpSize, T *
160157}
161158
162159/* *
163- * API for decompression
164- * Similar with SZ_decompress(SZ3::Config &config, char *cmpData, size_t cmpSize, T *&decData)
165- * The only difference is this one doesn't need pre-allocated buffer for decompressed data
166- *
167- * @tparam T decompressed data type
168- * @param config configuration placeholder. It will be overwritten by the compression configuration
169- * @param cmpData compressed data
170- * @param cmpSize compressed data size in bytes
171- * @return decompressed data, remember to 'delete []' when the data is no longer needed.
172-
173- example:
174- SZ3::Config conf;
175- float decompressedData = SZ_decompress(conf, cmpData, cmpSize)
160+ * Decompresses the compressed data into a pre-allocated buffer using the configuration loaded from the compressed data.
161+ * @tparam T The data type of the decompressed data.
162+ * @param config Configuration placeholder that will be overwritten with the compression configuration from the compressed data.
163+ * @param cmpData Pointer to the compressed data.
164+ * @param cmpSize The size of the compressed data in bytes.
165+ * @param decData Reference to a pointer for the pre-allocated buffer for decompressed data. If null, a new buffer is allocated.
166+ * @example
167+ * auto decData = new float[100 * 200 * 300];
168+ * SZ3::Config conf;
169+ * SZ_decompress(conf, cmpData, cmpSize, decData);
176170 */
177171template <class T >
178- T * SZ_decompress (SZ3::Config & config, const char * cmpData, size_t cmpSize) {
172+ T* SZ_decompress (SZ3::Config& config, const char * cmpData, size_t cmpSize) {
179173 using namespace SZ3 ;
180- T * decData = nullptr ;
174+ T* decData = nullptr ;
181175 SZ_decompress<T>(config, cmpData, cmpSize, decData);
182176 return decData;
183177}
0 commit comments