@@ -13,7 +13,7 @@ const COMPRESSION_HEADER_SIZE: usize = 9;
13
13
/// Compression flag for uncompressed data
14
14
const FLAG_UNCOMPRESSED : u8 = 0x00 ;
15
15
16
- /// Compression flag for zstd compressed data
16
+ /// Compression flag for zstd compressed data
17
17
const FLAG_ZSTD : u8 = 0x01 ;
18
18
19
19
/// Default zstd compression level
@@ -24,13 +24,13 @@ const DEFAULT_ZSTD_LEVEL: i32 = 3;
24
24
pub enum CompressionError {
25
25
#[ error( "invalid compression header" ) ]
26
26
InvalidHeader ,
27
-
27
+
28
28
#[ error( "invalid compression flag: {0}" ) ]
29
29
InvalidCompressionFlag ( u8 ) ,
30
-
30
+
31
31
#[ error( "decompression failed: {0}" ) ]
32
32
DecompressionFailed ( String ) ,
33
-
33
+
34
34
#[ error( "zstd error: {0}" ) ]
35
35
ZstdError ( #[ from] io:: Error ) ,
36
36
}
@@ -66,24 +66,22 @@ impl BlobCompressor {
66
66
compression_level : DEFAULT_ZSTD_LEVEL ,
67
67
}
68
68
}
69
-
69
+
70
70
/// Create a new blob compressor with custom compression level
71
71
pub fn with_level ( compression_level : i32 ) -> Self {
72
- Self {
73
- compression_level,
74
- }
72
+ Self { compression_level }
75
73
}
76
-
74
+
77
75
/// Compress a blob
78
76
pub fn compress ( & self , blob : & [ u8 ] ) -> Result < Bytes > {
79
77
// For empty blobs, just add uncompressed header
80
78
if blob. is_empty ( ) {
81
79
return Ok ( self . add_compression_header ( blob, FLAG_UNCOMPRESSED , 0 ) ) ;
82
80
}
83
-
81
+
84
82
// Try to compress with zstd
85
83
let compressed = zstd:: encode_all ( blob, self . compression_level ) ?;
86
-
84
+
87
85
// Check if compression is beneficial (at least 10% savings)
88
86
let compression_ratio = compressed. len ( ) as f64 / blob. len ( ) as f64 ;
89
87
if compression_ratio > 0.9 {
@@ -94,42 +92,42 @@ impl BlobCompressor {
94
92
Ok ( self . add_compression_header ( & compressed, FLAG_ZSTD , blob. len ( ) as u64 ) )
95
93
}
96
94
}
97
-
95
+
98
96
/// Decompress a blob
99
97
pub fn decompress ( & self , compressed_blob : & [ u8 ] ) -> Result < Bytes > {
100
98
// Check if blob is too small to have a header
101
99
if compressed_blob. len ( ) < COMPRESSION_HEADER_SIZE {
102
100
// Assume legacy uncompressed blob
103
101
return Ok ( Bytes :: copy_from_slice ( compressed_blob) ) ;
104
102
}
105
-
103
+
106
104
// Check the compression flag
107
105
let flag = compressed_blob[ 0 ] ;
108
-
106
+
109
107
// Handle invalid flags with legacy blob heuristics
110
108
if flag != FLAG_UNCOMPRESSED && flag != FLAG_ZSTD {
111
109
// This could be either a legacy blob or a corrupted header
112
110
// Use heuristics to determine which
113
-
114
- let original_size = u64:: from_le_bytes (
115
- compressed_blob[ 1 ..9 ] . try_into ( ) . unwrap_or ( [ 0 ; 8 ] )
116
- ) ;
117
-
111
+
112
+ let original_size =
113
+ u64:: from_le_bytes ( compressed_blob[ 1 ..9 ] . try_into ( ) . unwrap_or ( [ 0 ; 8 ] ) ) ;
114
+
118
115
// If flag is in printable ASCII range (32-126) and size is unreasonable,
119
116
// it's likely a legacy text blob
120
- if ( flag >= 32 && flag <= 126 ) &&
121
- ( original_size == 0 || original_size > ( compressed_blob. len ( ) as u64 * 100 ) ) {
117
+ if ( 32 ..=126 ) . contains ( & flag)
118
+ && ( original_size == 0 || original_size > ( compressed_blob. len ( ) as u64 * 100 ) )
119
+ {
122
120
// Likely a legacy blob
123
121
return Ok ( Bytes :: copy_from_slice ( compressed_blob) ) ;
124
122
}
125
-
123
+
126
124
// Otherwise, it's likely a corrupted compressed blob
127
125
return Err ( CompressionError :: InvalidCompressionFlag ( flag) ) ;
128
126
}
129
-
127
+
130
128
// Parse the header
131
129
let ( flag, original_size, payload) = self . parse_compression_header ( compressed_blob) ?;
132
-
130
+
133
131
match flag {
134
132
FLAG_UNCOMPRESSED => {
135
133
// Data is uncompressed, just return the payload
@@ -139,15 +137,16 @@ impl BlobCompressor {
139
137
// Decompress with zstd
140
138
let decompressed = zstd:: decode_all ( payload)
141
139
. map_err ( |e| CompressionError :: DecompressionFailed ( e. to_string ( ) ) ) ?;
142
-
140
+
143
141
// Verify the decompressed size matches
144
142
if decompressed. len ( ) as u64 != original_size {
145
- return Err ( CompressionError :: DecompressionFailed (
146
- format ! ( "size mismatch: expected {}, got {}" ,
147
- original_size, decompressed. len( ) )
148
- ) ) ;
143
+ return Err ( CompressionError :: DecompressionFailed ( format ! (
144
+ "size mismatch: expected {}, got {}" ,
145
+ original_size,
146
+ decompressed. len( )
147
+ ) ) ) ;
149
148
}
150
-
149
+
151
150
Ok ( Bytes :: from ( decompressed) )
152
151
}
153
152
_ => {
@@ -156,7 +155,7 @@ impl BlobCompressor {
156
155
}
157
156
}
158
157
}
159
-
158
+
160
159
/// Get compression information about a blob
161
160
pub fn get_compression_info ( & self , blob : & [ u8 ] ) -> CompressionInfo {
162
161
if blob. len ( ) < COMPRESSION_HEADER_SIZE {
@@ -168,7 +167,7 @@ impl BlobCompressor {
168
167
compression_ratio : 1.0 ,
169
168
} ;
170
169
}
171
-
170
+
172
171
let flag = blob[ 0 ] ;
173
172
if flag != FLAG_UNCOMPRESSED && flag != FLAG_ZSTD {
174
173
// Legacy or invalid blob
@@ -180,14 +179,14 @@ impl BlobCompressor {
180
179
compression_ratio : 1.0 ,
181
180
} ;
182
181
}
183
-
182
+
184
183
if let Ok ( ( flag, original_size, _) ) = self . parse_compression_header ( blob) {
185
184
let algorithm = match flag {
186
185
FLAG_UNCOMPRESSED => "none" ,
187
186
FLAG_ZSTD => "zstd" ,
188
187
_ => "unknown" ,
189
188
} ;
190
-
189
+
191
190
CompressionInfo {
192
191
is_compressed : flag == FLAG_ZSTD ,
193
192
algorithm : algorithm. to_string ( ) ,
@@ -209,40 +208,42 @@ impl BlobCompressor {
209
208
}
210
209
}
211
210
}
212
-
211
+
213
212
/// Add compression header to payload
214
213
fn add_compression_header ( & self , payload : & [ u8 ] , flag : u8 , original_size : u64 ) -> Bytes {
215
214
let mut result = BytesMut :: with_capacity ( COMPRESSION_HEADER_SIZE + payload. len ( ) ) ;
216
-
215
+
217
216
// Write flag
218
217
result. extend_from_slice ( & [ flag] ) ;
219
-
218
+
220
219
// Write original size (little-endian)
221
220
result. extend_from_slice ( & original_size. to_le_bytes ( ) ) ;
222
-
221
+
223
222
// Write payload
224
223
result. extend_from_slice ( payload) ;
225
-
224
+
226
225
result. freeze ( )
227
226
}
228
-
227
+
229
228
/// Parse compression header from blob
230
229
fn parse_compression_header < ' a > ( & self , blob : & ' a [ u8 ] ) -> Result < ( u8 , u64 , & ' a [ u8 ] ) > {
231
230
if blob. len ( ) < COMPRESSION_HEADER_SIZE {
232
231
return Err ( CompressionError :: InvalidHeader ) ;
233
232
}
234
-
233
+
235
234
let flag = blob[ 0 ] ;
236
235
let original_size = u64:: from_le_bytes (
237
- blob[ 1 ..9 ] . try_into ( ) . map_err ( |_| CompressionError :: InvalidHeader ) ?
236
+ blob[ 1 ..9 ]
237
+ . try_into ( )
238
+ . map_err ( |_| CompressionError :: InvalidHeader ) ?,
238
239
) ;
239
240
let payload = & blob[ COMPRESSION_HEADER_SIZE ..] ;
240
-
241
+
241
242
// Validate the compression flag
242
243
if flag != FLAG_UNCOMPRESSED && flag != FLAG_ZSTD {
243
244
return Err ( CompressionError :: InvalidCompressionFlag ( flag) ) ;
244
245
}
245
-
246
+
246
247
Ok ( ( flag, original_size, payload) )
247
248
}
248
249
}
@@ -271,101 +272,101 @@ pub fn get_compression_info(blob: &[u8]) -> CompressionInfo {
271
272
#[ cfg( test) ]
272
273
mod tests {
273
274
use super :: * ;
274
-
275
+
275
276
#[ test]
276
277
fn test_compress_decompress_roundtrip ( ) {
277
278
let compressor = BlobCompressor :: new ( ) ;
278
-
279
+
279
280
// Test with compressible data
280
281
let original = b"hello world " . repeat ( 100 ) ;
281
282
let compressed = compressor. compress ( & original) . unwrap ( ) ;
282
283
let decompressed = compressor. decompress ( & compressed) . unwrap ( ) ;
283
-
284
+
284
285
assert_eq ! ( original, decompressed. as_ref( ) ) ;
285
-
286
+
286
287
// Verify it was actually compressed
287
288
let info = compressor. get_compression_info ( & compressed) ;
288
289
assert ! ( info. is_compressed) ;
289
290
assert_eq ! ( info. algorithm, "zstd" ) ;
290
291
assert ! ( info. compression_ratio < 0.5 ) ; // Should compress well
291
292
}
292
-
293
+
293
294
#[ test]
294
295
fn test_uncompressed_fallback ( ) {
295
296
let compressor = BlobCompressor :: new ( ) ;
296
-
297
+
297
298
// Random data that won't compress well
298
299
let mut random_data = vec ! [ 0u8 ; 100 ] ;
299
- for i in 0 .. 100 {
300
- random_data [ i ] = ( i * 7 + 13 ) as u8 ; // Pseudo-random
300
+ for ( i , item ) in random_data . iter_mut ( ) . enumerate ( ) . take ( 100 ) {
301
+ * item = ( i * 7 + 13 ) as u8 ; // Pseudo-random
301
302
}
302
-
303
+
303
304
let compressed = compressor. compress ( & random_data) . unwrap ( ) ;
304
305
let decompressed = compressor. decompress ( & compressed) . unwrap ( ) ;
305
-
306
+
306
307
assert_eq ! ( random_data, decompressed. as_ref( ) ) ;
307
-
308
+
308
309
// Verify it was stored uncompressed
309
310
let info = compressor. get_compression_info ( & compressed) ;
310
311
assert ! ( !info. is_compressed) ;
311
312
assert_eq ! ( info. algorithm, "none" ) ;
312
313
}
313
-
314
+
314
315
#[ test]
315
316
fn test_legacy_blob ( ) {
316
317
let compressor = BlobCompressor :: new ( ) ;
317
-
318
+
318
319
// Test with legacy blob (no compression header)
319
320
let legacy_blob = b"legacy data without header" ;
320
-
321
+
321
322
// Should return as-is
322
323
let decompressed = compressor. decompress ( legacy_blob) . unwrap ( ) ;
323
324
assert_eq ! ( legacy_blob, decompressed. as_ref( ) ) ;
324
325
}
325
-
326
+
326
327
#[ test]
327
328
fn test_invalid_compression_flag ( ) {
328
329
let compressor = BlobCompressor :: new ( ) ;
329
-
330
+
330
331
// Create blob with invalid flag
331
332
let mut invalid_blob = vec ! [ 0u8 ; COMPRESSION_HEADER_SIZE + 10 ] ;
332
333
invalid_blob[ 0 ] = 0xFF ; // Invalid flag
333
-
334
+
334
335
// Should return error
335
336
let result = compressor. decompress ( & invalid_blob) ;
336
337
assert ! ( result. is_err( ) ) ;
337
-
338
+
338
339
match result. unwrap_err ( ) {
339
340
CompressionError :: InvalidCompressionFlag ( flag) => {
340
341
assert_eq ! ( flag, 0xFF ) ;
341
342
}
342
343
_ => panic ! ( "Expected InvalidCompressionFlag error" ) ,
343
344
}
344
345
}
345
-
346
+
346
347
#[ test]
347
348
fn test_empty_blob ( ) {
348
349
let compressor = BlobCompressor :: new ( ) ;
349
-
350
+
350
351
let empty = vec ! [ ] ;
351
352
let compressed = compressor. compress ( & empty) . unwrap ( ) ;
352
353
let decompressed = compressor. decompress ( & compressed) . unwrap ( ) ;
353
-
354
+
354
355
assert_eq ! ( empty, decompressed. as_ref( ) ) ;
355
356
}
356
-
357
+
357
358
#[ test]
358
359
fn test_compression_info ( ) {
359
360
let compressor = BlobCompressor :: new ( ) ;
360
-
361
+
361
362
let original = b"compress me " . repeat ( 100 ) ;
362
363
let compressed = compressor. compress ( & original) . unwrap ( ) ;
363
-
364
+
364
365
let info = compressor. get_compression_info ( & compressed) ;
365
366
assert ! ( info. is_compressed) ;
366
367
assert_eq ! ( info. algorithm, "zstd" ) ;
367
368
assert_eq ! ( info. original_size, original. len( ) as u64 ) ;
368
369
assert ! ( info. compression_ratio < 1.0 ) ;
369
370
assert ! ( info. compression_ratio > 0.0 ) ;
370
371
}
371
- }
372
+ }
0 commit comments