diff --git a/README.md b/README.md index fd7b406..658384e 100644 --- a/README.md +++ b/README.md @@ -12,6 +12,20 @@ Fork from https://github.com/jarredholman/ruby-zstd. ## Zstd version [v1.5.7](https://github.com/facebook/zstd/tree/v1.5.7) +## Versioning Policy + +Starting from v2.0.0, this gem follows Semantic Versioning. + +- **Major version** (X.0.0): Breaking changes to the API +- **Minor version** (X.Y.0): New features, including Zstd library version updates +- **Patch version** (X.Y.Z): Bug fixes and other backward-compatible changes + +### Zstd Library Updates + +Updates to the underlying Zstd library version will be released as **minor version** updates, as they may introduce new features or performance improvements while maintaining backward compatibility. + +**Note**: Versions prior to v2.0.0 followed the Zstd library versioning scheme with an additional patch number (e.g., 1.5.6.2). This approach has been replaced with semantic versioning to provide clearer expectations for API stability. + ## Installation Add this line to your application's Gemfile: @@ -155,6 +169,20 @@ result << stream.decompress(cstr[10..-1]) DDict can also be specified to `dict:`. +#### Streaming Decompression with Position Tracking + +If you need to know how much of the input data was consumed during decompression, you can use the `decompress_with_pos` method: + +```ruby +cstr = "" # Compressed data +stream = Zstd::StreamingDecompress.new +result, consumed_bytes = stream.decompress_with_pos(cstr[0, 10]) +# result contains the decompressed data +# consumed_bytes contains the number of bytes from input that were processed +``` + +This is particularly useful when processing streaming data where you need to track the exact position in the input stream. + ### Skippable frame ```ruby diff --git a/benchmarks/Gemfile.lock b/benchmarks/Gemfile.lock index 368439f..ceeecb2 100644 --- a/benchmarks/Gemfile.lock +++ b/benchmarks/Gemfile.lock @@ -16,4 +16,4 @@ DEPENDENCIES snappy BUNDLED WITH - 2.5.7 + 2.5.9 diff --git a/examples/sinatra/Gemfile.lock b/examples/sinatra/Gemfile.lock index 7f1fa19..745bd37 100644 --- a/examples/sinatra/Gemfile.lock +++ b/examples/sinatra/Gemfile.lock @@ -1,32 +1,34 @@ PATH remote: ../.. specs: - zstd-ruby (1.5.6.2) + zstd-ruby (2.0.0.pre.preview1) GEM remote: https://rubygems.org/ specs: - base64 (0.2.0) - mustermann (3.0.0) + base64 (0.3.0) + logger (1.7.0) + mustermann (3.0.3) ruby2_keywords (~> 0.0.1) - rack (3.0.10) - rack-protection (4.0.0) + rack (3.2.0) + rack-protection (4.1.1) base64 (>= 0.1.0) + logger (>= 1.6.0) rack (>= 3.0.0, < 4) - rack-session (2.0.0) + rack-session (2.1.1) + base64 (>= 0.1.0) rack (>= 3.0.0) - rackup (2.1.0) + rackup (2.2.1) rack (>= 3) - webrick (~> 1.8) ruby2_keywords (0.0.5) - sinatra (4.0.0) + sinatra (4.1.1) + logger (>= 1.6.0) mustermann (~> 3.0) rack (>= 3.0.0, < 4) - rack-protection (= 4.0.0) + rack-protection (= 4.1.1) rack-session (>= 2.0.0, < 3) tilt (~> 2.0) - tilt (2.3.0) - webrick (1.8.1) + tilt (2.6.1) PLATFORMS arm64-darwin-21 @@ -38,4 +40,4 @@ DEPENDENCIES zstd-ruby! BUNDLED WITH - 2.5.7 + 2.5.9 diff --git a/ext/zstdruby/common.h b/ext/zstdruby/common.h index aeadc7f..70e792e 100644 --- a/ext/zstdruby/common.h +++ b/ext/zstdruby/common.h @@ -18,7 +18,7 @@ static int convert_compression_level(VALUE compression_level_value) return NUM2INT(compression_level_value); } -static void set_compress_params(ZSTD_CCtx* const ctx, VALUE level_from_args, VALUE kwargs) +static void set_compress_params(ZSTD_CCtx* const ctx, VALUE kwargs) { ID kwargs_keys[2]; kwargs_keys[0] = rb_intern("level"); @@ -29,9 +29,6 @@ static void set_compress_params(ZSTD_CCtx* const ctx, VALUE level_from_args, VAL int compression_level = ZSTD_CLEVEL_DEFAULT; if (kwargs_values[0] != Qundef && kwargs_values[0] != Qnil) { compression_level = convert_compression_level(kwargs_values[0]); - } else if (!NIL_P(level_from_args)) { - rb_warn("`level` in args is deprecated; use keyword args `level:` instead."); - compression_level = convert_compression_level(level_from_args); } ZSTD_CCtx_setParameter(ctx, ZSTD_c_compressionLevel, compression_level); diff --git a/ext/zstdruby/streaming_compress.c b/ext/zstdruby/streaming_compress.c index af01a3a..8614d47 100644 --- a/ext/zstdruby/streaming_compress.c +++ b/ext/zstdruby/streaming_compress.c @@ -71,8 +71,7 @@ static VALUE rb_streaming_compress_initialize(int argc, VALUE *argv, VALUE obj) { VALUE kwargs; - VALUE compression_level_value; - rb_scan_args(argc, argv, "01:", &compression_level_value, &kwargs); + rb_scan_args(argc, argv, "00:", &kwargs); struct streaming_compress_t* sc; TypedData_Get_Struct(obj, struct streaming_compress_t, &streaming_compress_type, sc); @@ -82,7 +81,7 @@ rb_streaming_compress_initialize(int argc, VALUE *argv, VALUE obj) if (ctx == NULL) { rb_raise(rb_eRuntimeError, "%s", "ZSTD_createCCtx error"); } - set_compress_params(ctx, compression_level_value, kwargs); + set_compress_params(ctx, kwargs); sc->ctx = ctx; sc->buf = rb_str_new(NULL, buffOutSize); diff --git a/ext/zstdruby/streaming_decompress.c b/ext/zstdruby/streaming_decompress.c index 8ae54f2..0336967 100644 --- a/ext/zstdruby/streaming_decompress.c +++ b/ext/zstdruby/streaming_decompress.c @@ -113,6 +113,27 @@ rb_streaming_decompress_decompress(VALUE obj, VALUE src) return result; } +static VALUE +rb_streaming_decompress_decompress_with_pos(VALUE obj, VALUE src) +{ + StringValue(src); + const char* input_data = RSTRING_PTR(src); + size_t input_size = RSTRING_LEN(src); + ZSTD_inBuffer input = { input_data, input_size, 0 }; + + struct streaming_decompress_t* sd; + TypedData_Get_Struct(obj, struct streaming_decompress_t, &streaming_decompress_type, sd); + const char* output_data = RSTRING_PTR(sd->buf); + VALUE result = rb_str_new(0, 0); + ZSTD_outBuffer output = { (void*)output_data, sd->buf_size, 0 }; + size_t const ret = zstd_stream_decompress(sd->dctx, &output, &input, false); + if (ZSTD_isError(ret)) { + rb_raise(rb_eRuntimeError, "decompress error error code: %s", ZSTD_getErrorName(ret)); + } + rb_str_cat(result, output.dst, output.pos); + return rb_ary_new_from_args(2, result, ULONG2NUM(input.pos)); +} + extern VALUE rb_mZstd, cStreamingDecompress; void zstd_ruby_streaming_decompress_init(void) @@ -121,4 +142,5 @@ zstd_ruby_streaming_decompress_init(void) rb_define_alloc_func(cStreamingDecompress, rb_streaming_decompress_allocate); rb_define_method(cStreamingDecompress, "initialize", rb_streaming_decompress_initialize, -1); rb_define_method(cStreamingDecompress, "decompress", rb_streaming_decompress_decompress, 1); + rb_define_method(cStreamingDecompress, "decompress_with_pos", rb_streaming_decompress_decompress_with_pos, 1); } diff --git a/ext/zstdruby/zstdruby.c b/ext/zstdruby/zstdruby.c index fd95d9a..e63b95c 100644 --- a/ext/zstdruby/zstdruby.c +++ b/ext/zstdruby/zstdruby.c @@ -11,16 +11,15 @@ static VALUE zstdVersion(VALUE self) static VALUE rb_compress(int argc, VALUE *argv, VALUE self) { VALUE input_value; - VALUE compression_level_value; VALUE kwargs; - rb_scan_args(argc, argv, "11:", &input_value, &compression_level_value, &kwargs); + rb_scan_args(argc, argv, "10:", &input_value, &kwargs); ZSTD_CCtx* const ctx = ZSTD_createCCtx(); if (ctx == NULL) { rb_raise(rb_eRuntimeError, "%s", "ZSTD_createCCtx error"); } - set_compress_params(ctx, compression_level_value, kwargs); + set_compress_params(ctx, kwargs); StringValue(input_value); char* input_data = RSTRING_PTR(input_value); @@ -40,51 +39,6 @@ static VALUE rb_compress(int argc, VALUE *argv, VALUE self) return output; } -static VALUE rb_compress_using_dict(int argc, VALUE *argv, VALUE self) -{ - rb_warn("Zstd.compress_using_dict is deprecated; use Zstd.compress with `dict:` instead."); - VALUE input_value; - VALUE dict; - VALUE compression_level_value; - rb_scan_args(argc, argv, "21", &input_value, &dict, &compression_level_value); - int compression_level = convert_compression_level(compression_level_value); - - StringValue(input_value); - char* input_data = RSTRING_PTR(input_value); - size_t input_size = RSTRING_LEN(input_value); - size_t max_compressed_size = ZSTD_compressBound(input_size); - - char* dict_buffer = RSTRING_PTR(dict); - size_t dict_size = RSTRING_LEN(dict); - - ZSTD_CDict* const cdict = ZSTD_createCDict(dict_buffer, dict_size, compression_level); - if (cdict == NULL) { - rb_raise(rb_eRuntimeError, "%s", "ZSTD_createCDict failed"); - } - ZSTD_CCtx* const ctx = ZSTD_createCCtx(); - if (ctx == NULL) { - ZSTD_freeCDict(cdict); - rb_raise(rb_eRuntimeError, "%s", "ZSTD_createCCtx failed"); - } - - VALUE output = rb_str_new(NULL, max_compressed_size); - char* output_data = RSTRING_PTR(output); - size_t const compressed_size = ZSTD_compress_usingCDict(ctx, (void*)output_data, max_compressed_size, - (void*)input_data, input_size, cdict); - - if (ZSTD_isError(compressed_size)) { - ZSTD_freeCDict(cdict); - ZSTD_freeCCtx(ctx); - rb_raise(rb_eRuntimeError, "%s: %s", "compress failed", ZSTD_getErrorName(compressed_size)); - } - - rb_str_resize(output, compressed_size); - ZSTD_freeCDict(cdict); - ZSTD_freeCCtx(ctx); - return output; -} - - static VALUE decompress_buffered(ZSTD_DCtx* dctx, const char* input_data, size_t input_size) { ZSTD_inBuffer input = { input_data, input_size, 0 }; @@ -142,59 +96,6 @@ static VALUE rb_decompress(int argc, VALUE *argv, VALUE self) return output; } -static VALUE rb_decompress_using_dict(int argc, VALUE *argv, VALUE self) -{ - rb_warn("Zstd.decompress_using_dict is deprecated; use Zstd.decompress with `dict:` instead."); - VALUE input_value; - VALUE dict; - rb_scan_args(argc, argv, "20", &input_value, &dict); - - StringValue(input_value); - char* input_data = RSTRING_PTR(input_value); - size_t input_size = RSTRING_LEN(input_value); - - char* dict_buffer = RSTRING_PTR(dict); - size_t dict_size = RSTRING_LEN(dict); - ZSTD_DDict* const ddict = ZSTD_createDDict(dict_buffer, dict_size); - if (ddict == NULL) { - rb_raise(rb_eRuntimeError, "%s", "ZSTD_createDDict failed"); - } - unsigned const expected_dict_id = ZSTD_getDictID_fromDDict(ddict); - unsigned const actual_dict_id = ZSTD_getDictID_fromFrame(input_data, input_size); - if (expected_dict_id != actual_dict_id) { - ZSTD_freeDDict(ddict); - rb_raise(rb_eRuntimeError, "DictID mismatch"); - } - - ZSTD_DCtx* const ctx = ZSTD_createDCtx(); - if (ctx == NULL) { - ZSTD_freeDDict(ddict); - rb_raise(rb_eRuntimeError, "%s", "ZSTD_createDCtx failed"); - } - - unsigned long long const uncompressed_size = ZSTD_getFrameContentSize(input_data, input_size); - if (uncompressed_size == ZSTD_CONTENTSIZE_ERROR) { - ZSTD_freeDDict(ddict); - ZSTD_freeDCtx(ctx); - rb_raise(rb_eRuntimeError, "%s: %s", "not compressed by zstd", ZSTD_getErrorName(uncompressed_size)); - } - if (uncompressed_size == ZSTD_CONTENTSIZE_UNKNOWN) { - return decompress_buffered(ctx, input_data, input_size); - } - - VALUE output = rb_str_new(NULL, uncompressed_size); - char* output_data = RSTRING_PTR(output); - size_t const decompress_size = ZSTD_decompress_usingDDict(ctx, output_data, uncompressed_size, input_data, input_size, ddict); - if (ZSTD_isError(decompress_size)) { - ZSTD_freeDDict(ddict); - ZSTD_freeDCtx(ctx); - rb_raise(rb_eRuntimeError, "%s: %s", "decompress error", ZSTD_getErrorName(decompress_size)); - } - ZSTD_freeDDict(ddict); - ZSTD_freeDCtx(ctx); - return output; -} - static void free_cdict(void *dict) { ZSTD_freeCDict(dict); @@ -284,9 +185,7 @@ zstd_ruby_init(void) { rb_define_module_function(rb_mZstd, "zstd_version", zstdVersion, 0); rb_define_module_function(rb_mZstd, "compress", rb_compress, -1); - rb_define_module_function(rb_mZstd, "compress_using_dict", rb_compress_using_dict, -1); rb_define_module_function(rb_mZstd, "decompress", rb_decompress, -1); - rb_define_module_function(rb_mZstd, "decompress_using_dict", rb_decompress_using_dict, -1); rb_define_alloc_func(rb_cCDict, rb_cdict_alloc); rb_define_private_method(rb_cCDict, "initialize", rb_cdict_initialize, -1); diff --git a/lib/zstd-ruby/version.rb b/lib/zstd-ruby/version.rb index c588151..94cd93d 100644 --- a/lib/zstd-ruby/version.rb +++ b/lib/zstd-ruby/version.rb @@ -1,3 +1,3 @@ module Zstd - VERSION = "1.5.7.0" + VERSION = "2.0.0-preview1" end diff --git a/spec/zstd-ruby-streaming-decompress_spec.rb b/spec/zstd-ruby-streaming-decompress_spec.rb index 6eb0534..de25b47 100644 --- a/spec/zstd-ruby-streaming-decompress_spec.rb +++ b/spec/zstd-ruby-streaming-decompress_spec.rb @@ -17,6 +17,71 @@ end end + describe 'decompress_with_pos' do + it 'should return decompressed data and consumed input position' do + str = "hello world test data" + cstr = Zstd.compress(str) + stream = Zstd::StreamingDecompress.new + + # Test with partial input + result_array = stream.decompress_with_pos(cstr[0, 10]) + expect(result_array).to be_an(Array) + expect(result_array.length).to eq(2) + + decompressed_data = result_array[0] + consumed_bytes = result_array[1] + + expect(decompressed_data).to be_a(String) + expect(consumed_bytes).to be_a(Integer) + expect(consumed_bytes).to be > 0 + expect(consumed_bytes).to be <= 10 + end + + it 'should work with complete compressed data' do + str = "foo bar buzz" + cstr = Zstd.compress(str) + stream = Zstd::StreamingDecompress.new + + result_array = stream.decompress_with_pos(cstr) + decompressed_data = result_array[0] + consumed_bytes = result_array[1] + + expect(decompressed_data).to eq(str) + expect(consumed_bytes).to eq(cstr.length) + end + + it 'should work with multiple calls' do + str = "test data for multiple calls" + cstr = Zstd.compress(str) + stream = Zstd::StreamingDecompress.new + + result = '' + total_consumed = 0 + chunk_size = 5 + + while total_consumed < cstr.length + remaining_data = cstr[total_consumed..-1] + chunk = remaining_data[0, chunk_size] + + result_array = stream.decompress_with_pos(chunk) + decompressed_chunk = result_array[0] + consumed_bytes = result_array[1] + + result << decompressed_chunk + total_consumed += consumed_bytes + + expect(consumed_bytes).to be > 0 + expect(consumed_bytes).to be <= chunk.length + + # If we consumed less than the chunk size, we might be done or need more data + break if consumed_bytes < chunk.length && total_consumed == cstr.length + end + + expect(result).to eq(str) + expect(total_consumed).to eq(cstr.length) + end + end + describe 'streaming decompress + GC.compact' do it 'shoud work' do # str = SecureRandom.hex(150) @@ -109,4 +174,3 @@ end end end - diff --git a/spec/zstd-ruby-using-dict_spec.rb b/spec/zstd-ruby-using-dict_spec.rb index b946567..34226ee 100644 --- a/spec/zstd-ruby-using-dict_spec.rb +++ b/spec/zstd-ruby-using-dict_spec.rb @@ -88,49 +88,4 @@ end end - describe 'compress_using_dict' do - let(:user_json) do - File.read("#{__dir__}/user_springmt.json") - end - let(:dictionary) do - File.read("#{__dir__}/dictionary") - end - - it 'should work' do - compressed_using_dict = Zstd.compress_using_dict(user_json, dictionary) - compressed = Zstd.compress(user_json) - expect(compressed_using_dict.length).to be < compressed.length - expect(user_json).to eq(Zstd.decompress_using_dict(compressed_using_dict, dictionary)) - end - - it 'should work with simple string' do - compressed_using_dict = Zstd.compress_using_dict("abc", dictionary) - expect("abc").to eq(Zstd.decompress_using_dict(compressed_using_dict, dictionary)) - end - - it 'should work with blank input' do - compressed_using_dict = Zstd.compress_using_dict("", dictionary) - expect("").to eq(Zstd.decompress_using_dict(compressed_using_dict, dictionary)) - end - - it 'should work with blank dictionary' do - compressed_using_dict = Zstd.compress_using_dict(user_json, "") - expect(user_json).to eq(Zstd.decompress_using_dict(compressed_using_dict, "")) - expect(user_json).to eq(Zstd.decompress(compressed_using_dict)) - end - - it 'should support compression levels' do - compressed_using_dict = Zstd.compress_using_dict(user_json, dictionary) - compressed_using_dict_10 = Zstd.compress_using_dict(user_json, dictionary, 10) - expect(compressed_using_dict_10.length).to be < compressed_using_dict.length - expect(user_json).to eq(Zstd.decompress_using_dict(compressed_using_dict_10, dictionary)) - end - - it 'should support compression levels with blank dictionary' do - compressed_using_dict_10 = Zstd.compress_using_dict(user_json, "", 10) - expect(user_json).to eq(Zstd.decompress_using_dict(compressed_using_dict_10, "")) - expect(user_json).to eq(Zstd.decompress(compressed_using_dict_10)) - end - end - end diff --git a/spec/zstd-ruby_spec.rb b/spec/zstd-ruby_spec.rb index 69954b9..15cc2c7 100644 --- a/spec/zstd-ruby_spec.rb +++ b/spec/zstd-ruby_spec.rb @@ -24,20 +24,13 @@ expect(compressed).to_not eq(user_json) end - it 'should support compression levels' do - compressed = Zstd.compress(user_json, 1) - expect(compressed).to be_a(String) - expect(compressed).to_not eq(user_json) - end - it 'should support compression keyward args levels' do compressed = Zstd.compress(user_json, level: 1) - compressed_with_arg = Zstd.compress(user_json, 1) compressed_default = Zstd.compress(user_json) expect(compressed).to be_a(String) expect(compressed).to_not eq(user_json) - expect(compressed).to eq(compressed_with_arg) - expect(compressed_default.length).to be < compressed_with_arg.length + expect(compressed).to_not eq(compressed_default) + expect(compressed_default.length).to be < compressed.length end it 'should compress large bytes' do