|
21 | 21 |
|
22 | 22 | #include <zlib.h> |
23 | 23 |
|
24 | | -#include <array> |
25 | 24 | #include <format> |
26 | 25 | #include <string> |
27 | 26 |
|
@@ -155,40 +154,68 @@ Result<MetadataFileCodecType> TableMetadataUtil::CodecFromFileName( |
155 | 154 | return MetadataFileCodecType::kNone; |
156 | 155 | } |
157 | 156 |
|
158 | | -Result<std::string> DecompressGZIPFile(const std::string& filepath) { |
159 | | - gzFile file = gzopen(filepath.c_str(), "rb"); |
160 | | - if (!file) { |
161 | | - return IOError("Failed to open gzip file:{} ", filepath); |
162 | | - } |
| 157 | +class GZipDecompressor { |
| 158 | + public: |
| 159 | + GZipDecompressor() { memset(&stream_, 0, sizeof(stream_)); } |
163 | 160 |
|
164 | | - static const int CHUNK_SIZE = 32768; // 32KB chunks |
165 | | - std::array<char, CHUNK_SIZE> buffer; |
166 | | - std::string result; |
167 | | - int bytes_read; |
| 161 | + ~GZipDecompressor() { |
| 162 | + if (initialized_) { |
| 163 | + inflateEnd(&stream_); |
| 164 | + } |
| 165 | + } |
168 | 166 |
|
169 | | - while ((bytes_read = gzread(file, buffer.data(), CHUNK_SIZE)) > 0) { |
170 | | - result.append(buffer.data(), bytes_read); |
| 167 | + Status Init() { |
| 168 | + int ret = inflateInit2(&stream_, 15 + 32); |
| 169 | + if (ret != Z_OK) { |
| 170 | + return IOError("inflateInit2 failed, result:{}", ret); |
| 171 | + } |
| 172 | + initialized_ = true; |
| 173 | + return {}; |
171 | 174 | } |
172 | 175 |
|
173 | | - int err; |
174 | | - const char* error_msg = gzerror(file, &err); |
175 | | - if (err != Z_OK) { |
176 | | - gzclose(file); |
177 | | - return IOError("Error during gzip decompression:{} ", std::string(error_msg)); |
| 176 | + Result<std::string> Decompress(const std::string& compressed_data) { |
| 177 | + if (compressed_data.empty()) { |
| 178 | + return {}; |
| 179 | + } |
| 180 | + if (!initialized_) { |
| 181 | + ICEBERG_RETURN_UNEXPECTED(Init()); |
| 182 | + } |
| 183 | + stream_.avail_in = static_cast<uInt>(compressed_data.size()); |
| 184 | + stream_.next_in = reinterpret_cast<Bytef*>(const_cast<char*>(compressed_data.data())); |
| 185 | + |
| 186 | + // TODO(xiao.dong) magic buffer, can we get a estimated size from compressed data? |
| 187 | + std::vector<char> outBuffer(32 * 1024); |
| 188 | + std::string result; |
| 189 | + int ret = 0; |
| 190 | + do { |
| 191 | + outBuffer.resize(outBuffer.size()); |
| 192 | + stream_.avail_out = static_cast<uInt>(outBuffer.size()); |
| 193 | + stream_.next_out = reinterpret_cast<Bytef*>(outBuffer.data()); |
| 194 | + ret = inflate(&stream_, Z_NO_FLUSH); |
| 195 | + if (ret != Z_OK && ret != Z_STREAM_END) { |
| 196 | + return IOError("inflate failed, result:{}", ret); |
| 197 | + } |
| 198 | + result.append(outBuffer.data(), outBuffer.size() - stream_.avail_out); |
| 199 | + } while (ret != Z_STREAM_END); |
| 200 | + return result; |
178 | 201 | } |
179 | 202 |
|
180 | | - gzclose(file); |
181 | | - return result; |
182 | | -} |
| 203 | + private: |
| 204 | + bool initialized_ = false; |
| 205 | + z_stream stream_; |
| 206 | +}; |
183 | 207 |
|
184 | 208 | Result<std::unique_ptr<TableMetadata>> TableMetadataUtil::Read( |
185 | 209 | FileIO& io, const std::string& location, std::optional<size_t> length) { |
186 | 210 | ICEBERG_ASSIGN_OR_RAISE(auto codec_type, CodecFromFileName(location)); |
187 | | - std::string content; |
| 211 | + |
| 212 | + ICEBERG_ASSIGN_OR_RAISE(auto content, io.ReadFile(location, length)); |
188 | 213 | if (codec_type == MetadataFileCodecType::kGzip) { |
189 | | - ICEBERG_ASSIGN_OR_RAISE(content, DecompressGZIPFile(location)); |
190 | | - } else { |
191 | | - ICEBERG_ASSIGN_OR_RAISE(content, io.ReadFile(location, length)); |
| 214 | + auto gzip_decompressor = std::make_unique<GZipDecompressor>(); |
| 215 | + ICEBERG_RETURN_UNEXPECTED(gzip_decompressor->Init()); |
| 216 | + auto result = gzip_decompressor->Decompress(content); |
| 217 | + ICEBERG_RETURN_UNEXPECTED(result); |
| 218 | + content = result.value(); |
192 | 219 | } |
193 | 220 |
|
194 | 221 | ICEBERG_ASSIGN_OR_RAISE(auto json, FromJsonString(content)); |
|
0 commit comments