Skip to content
Merged

V2 #109

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,20 @@ Fork from https://github.com/jarredholman/ruby-zstd.
## Zstd version
[v1.5.7](https://github.com/facebook/zstd/tree/v1.5.7)

## Versioning Policy

Starting from v2.0.0, this gem follows Semantic Versioning.

- **Major version** (X.0.0): Breaking changes to the API
- **Minor version** (X.Y.0): New features, including Zstd library version updates
- **Patch version** (X.Y.Z): Bug fixes and other backward-compatible changes

### Zstd Library Updates

Updates to the underlying Zstd library version will be released as **minor version** updates, as they may introduce new features or performance improvements while maintaining backward compatibility.

**Note**: Versions prior to v2.0.0 followed the Zstd library versioning scheme with an additional patch number (e.g., 1.5.6.2). This approach has been replaced with semantic versioning to provide clearer expectations for API stability.

## Installation

Add this line to your application's Gemfile:
Expand Down Expand Up @@ -155,6 +169,20 @@ result << stream.decompress(cstr[10..-1])

DDict can also be specified to `dict:`.

#### Streaming Decompression with Position Tracking

If you need to know how much of the input data was consumed during decompression, you can use the `decompress_with_pos` method:

```ruby
cstr = "" # Compressed data
stream = Zstd::StreamingDecompress.new
result, consumed_bytes = stream.decompress_with_pos(cstr[0, 10])
# result contains the decompressed data
# consumed_bytes contains the number of bytes from input that were processed
```

This is particularly useful when processing streaming data where you need to track the exact position in the input stream.

### Skippable frame

```ruby
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,4 @@ DEPENDENCIES
snappy

BUNDLED WITH
2.5.7
2.5.9
28 changes: 15 additions & 13 deletions examples/sinatra/Gemfile.lock
Original file line number Diff line number Diff line change
@@ -1,32 +1,34 @@
PATH
remote: ../..
specs:
zstd-ruby (1.5.6.2)
zstd-ruby (2.0.0.pre.preview1)

GEM
remote: https://rubygems.org/
specs:
base64 (0.2.0)
mustermann (3.0.0)
base64 (0.3.0)
logger (1.7.0)
mustermann (3.0.3)
ruby2_keywords (~> 0.0.1)
rack (3.0.10)
rack-protection (4.0.0)
rack (3.2.0)
rack-protection (4.1.1)
base64 (>= 0.1.0)
logger (>= 1.6.0)
rack (>= 3.0.0, < 4)
rack-session (2.0.0)
rack-session (2.1.1)
base64 (>= 0.1.0)
rack (>= 3.0.0)
rackup (2.1.0)
rackup (2.2.1)
rack (>= 3)
webrick (~> 1.8)
ruby2_keywords (0.0.5)
sinatra (4.0.0)
sinatra (4.1.1)
logger (>= 1.6.0)
mustermann (~> 3.0)
rack (>= 3.0.0, < 4)
rack-protection (= 4.0.0)
rack-protection (= 4.1.1)
rack-session (>= 2.0.0, < 3)
tilt (~> 2.0)
tilt (2.3.0)
webrick (1.8.1)
tilt (2.6.1)

PLATFORMS
arm64-darwin-21
Expand All @@ -38,4 +40,4 @@ DEPENDENCIES
zstd-ruby!

BUNDLED WITH
2.5.7
2.5.9
5 changes: 1 addition & 4 deletions ext/zstdruby/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ static int convert_compression_level(VALUE compression_level_value)
return NUM2INT(compression_level_value);
}

static void set_compress_params(ZSTD_CCtx* const ctx, VALUE level_from_args, VALUE kwargs)
static void set_compress_params(ZSTD_CCtx* const ctx, VALUE kwargs)
{
ID kwargs_keys[2];
kwargs_keys[0] = rb_intern("level");
Expand All @@ -29,9 +29,6 @@ static void set_compress_params(ZSTD_CCtx* const ctx, VALUE level_from_args, VAL
int compression_level = ZSTD_CLEVEL_DEFAULT;
if (kwargs_values[0] != Qundef && kwargs_values[0] != Qnil) {
compression_level = convert_compression_level(kwargs_values[0]);
} else if (!NIL_P(level_from_args)) {
rb_warn("`level` in args is deprecated; use keyword args `level:` instead.");
compression_level = convert_compression_level(level_from_args);
}
ZSTD_CCtx_setParameter(ctx, ZSTD_c_compressionLevel, compression_level);

Expand Down
5 changes: 2 additions & 3 deletions ext/zstdruby/streaming_compress.c
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,7 @@ static VALUE
rb_streaming_compress_initialize(int argc, VALUE *argv, VALUE obj)
{
VALUE kwargs;
VALUE compression_level_value;
rb_scan_args(argc, argv, "01:", &compression_level_value, &kwargs);
rb_scan_args(argc, argv, "00:", &kwargs);

struct streaming_compress_t* sc;
TypedData_Get_Struct(obj, struct streaming_compress_t, &streaming_compress_type, sc);
Expand All @@ -82,7 +81,7 @@ rb_streaming_compress_initialize(int argc, VALUE *argv, VALUE obj)
if (ctx == NULL) {
rb_raise(rb_eRuntimeError, "%s", "ZSTD_createCCtx error");
}
set_compress_params(ctx, compression_level_value, kwargs);
set_compress_params(ctx, kwargs);

sc->ctx = ctx;
sc->buf = rb_str_new(NULL, buffOutSize);
Expand Down
22 changes: 22 additions & 0 deletions ext/zstdruby/streaming_decompress.c
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,27 @@ rb_streaming_decompress_decompress(VALUE obj, VALUE src)
return result;
}

static VALUE
rb_streaming_decompress_decompress_with_pos(VALUE obj, VALUE src)
{
StringValue(src);
const char* input_data = RSTRING_PTR(src);
size_t input_size = RSTRING_LEN(src);
ZSTD_inBuffer input = { input_data, input_size, 0 };

struct streaming_decompress_t* sd;
TypedData_Get_Struct(obj, struct streaming_decompress_t, &streaming_decompress_type, sd);
const char* output_data = RSTRING_PTR(sd->buf);
VALUE result = rb_str_new(0, 0);
ZSTD_outBuffer output = { (void*)output_data, sd->buf_size, 0 };
size_t const ret = zstd_stream_decompress(sd->dctx, &output, &input, false);
if (ZSTD_isError(ret)) {
rb_raise(rb_eRuntimeError, "decompress error error code: %s", ZSTD_getErrorName(ret));
}
rb_str_cat(result, output.dst, output.pos);
return rb_ary_new_from_args(2, result, ULONG2NUM(input.pos));
}

extern VALUE rb_mZstd, cStreamingDecompress;
void
zstd_ruby_streaming_decompress_init(void)
Expand All @@ -121,4 +142,5 @@ zstd_ruby_streaming_decompress_init(void)
rb_define_alloc_func(cStreamingDecompress, rb_streaming_decompress_allocate);
rb_define_method(cStreamingDecompress, "initialize", rb_streaming_decompress_initialize, -1);
rb_define_method(cStreamingDecompress, "decompress", rb_streaming_decompress_decompress, 1);
rb_define_method(cStreamingDecompress, "decompress_with_pos", rb_streaming_decompress_decompress_with_pos, 1);
}
105 changes: 2 additions & 103 deletions ext/zstdruby/zstdruby.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,15 @@ static VALUE zstdVersion(VALUE self)
static VALUE rb_compress(int argc, VALUE *argv, VALUE self)
{
VALUE input_value;
VALUE compression_level_value;
VALUE kwargs;
rb_scan_args(argc, argv, "11:", &input_value, &compression_level_value, &kwargs);
rb_scan_args(argc, argv, "10:", &input_value, &kwargs);

ZSTD_CCtx* const ctx = ZSTD_createCCtx();
if (ctx == NULL) {
rb_raise(rb_eRuntimeError, "%s", "ZSTD_createCCtx error");
}

set_compress_params(ctx, compression_level_value, kwargs);
set_compress_params(ctx, kwargs);

StringValue(input_value);
char* input_data = RSTRING_PTR(input_value);
Expand All @@ -40,51 +39,6 @@ static VALUE rb_compress(int argc, VALUE *argv, VALUE self)
return output;
}

static VALUE rb_compress_using_dict(int argc, VALUE *argv, VALUE self)
{
rb_warn("Zstd.compress_using_dict is deprecated; use Zstd.compress with `dict:` instead.");
VALUE input_value;
VALUE dict;
VALUE compression_level_value;
rb_scan_args(argc, argv, "21", &input_value, &dict, &compression_level_value);
int compression_level = convert_compression_level(compression_level_value);

StringValue(input_value);
char* input_data = RSTRING_PTR(input_value);
size_t input_size = RSTRING_LEN(input_value);
size_t max_compressed_size = ZSTD_compressBound(input_size);

char* dict_buffer = RSTRING_PTR(dict);
size_t dict_size = RSTRING_LEN(dict);

ZSTD_CDict* const cdict = ZSTD_createCDict(dict_buffer, dict_size, compression_level);
if (cdict == NULL) {
rb_raise(rb_eRuntimeError, "%s", "ZSTD_createCDict failed");
}
ZSTD_CCtx* const ctx = ZSTD_createCCtx();
if (ctx == NULL) {
ZSTD_freeCDict(cdict);
rb_raise(rb_eRuntimeError, "%s", "ZSTD_createCCtx failed");
}

VALUE output = rb_str_new(NULL, max_compressed_size);
char* output_data = RSTRING_PTR(output);
size_t const compressed_size = ZSTD_compress_usingCDict(ctx, (void*)output_data, max_compressed_size,
(void*)input_data, input_size, cdict);

if (ZSTD_isError(compressed_size)) {
ZSTD_freeCDict(cdict);
ZSTD_freeCCtx(ctx);
rb_raise(rb_eRuntimeError, "%s: %s", "compress failed", ZSTD_getErrorName(compressed_size));
}

rb_str_resize(output, compressed_size);
ZSTD_freeCDict(cdict);
ZSTD_freeCCtx(ctx);
return output;
}


static VALUE decompress_buffered(ZSTD_DCtx* dctx, const char* input_data, size_t input_size)
{
ZSTD_inBuffer input = { input_data, input_size, 0 };
Expand Down Expand Up @@ -142,59 +96,6 @@ static VALUE rb_decompress(int argc, VALUE *argv, VALUE self)
return output;
}

static VALUE rb_decompress_using_dict(int argc, VALUE *argv, VALUE self)
{
rb_warn("Zstd.decompress_using_dict is deprecated; use Zstd.decompress with `dict:` instead.");
VALUE input_value;
VALUE dict;
rb_scan_args(argc, argv, "20", &input_value, &dict);

StringValue(input_value);
char* input_data = RSTRING_PTR(input_value);
size_t input_size = RSTRING_LEN(input_value);

char* dict_buffer = RSTRING_PTR(dict);
size_t dict_size = RSTRING_LEN(dict);
ZSTD_DDict* const ddict = ZSTD_createDDict(dict_buffer, dict_size);
if (ddict == NULL) {
rb_raise(rb_eRuntimeError, "%s", "ZSTD_createDDict failed");
}
unsigned const expected_dict_id = ZSTD_getDictID_fromDDict(ddict);
unsigned const actual_dict_id = ZSTD_getDictID_fromFrame(input_data, input_size);
if (expected_dict_id != actual_dict_id) {
ZSTD_freeDDict(ddict);
rb_raise(rb_eRuntimeError, "DictID mismatch");
}

ZSTD_DCtx* const ctx = ZSTD_createDCtx();
if (ctx == NULL) {
ZSTD_freeDDict(ddict);
rb_raise(rb_eRuntimeError, "%s", "ZSTD_createDCtx failed");
}

unsigned long long const uncompressed_size = ZSTD_getFrameContentSize(input_data, input_size);
if (uncompressed_size == ZSTD_CONTENTSIZE_ERROR) {
ZSTD_freeDDict(ddict);
ZSTD_freeDCtx(ctx);
rb_raise(rb_eRuntimeError, "%s: %s", "not compressed by zstd", ZSTD_getErrorName(uncompressed_size));
}
if (uncompressed_size == ZSTD_CONTENTSIZE_UNKNOWN) {
return decompress_buffered(ctx, input_data, input_size);
}

VALUE output = rb_str_new(NULL, uncompressed_size);
char* output_data = RSTRING_PTR(output);
size_t const decompress_size = ZSTD_decompress_usingDDict(ctx, output_data, uncompressed_size, input_data, input_size, ddict);
if (ZSTD_isError(decompress_size)) {
ZSTD_freeDDict(ddict);
ZSTD_freeDCtx(ctx);
rb_raise(rb_eRuntimeError, "%s: %s", "decompress error", ZSTD_getErrorName(decompress_size));
}
ZSTD_freeDDict(ddict);
ZSTD_freeDCtx(ctx);
return output;
}

static void free_cdict(void *dict)
{
ZSTD_freeCDict(dict);
Expand Down Expand Up @@ -284,9 +185,7 @@ zstd_ruby_init(void)
{
rb_define_module_function(rb_mZstd, "zstd_version", zstdVersion, 0);
rb_define_module_function(rb_mZstd, "compress", rb_compress, -1);
rb_define_module_function(rb_mZstd, "compress_using_dict", rb_compress_using_dict, -1);
rb_define_module_function(rb_mZstd, "decompress", rb_decompress, -1);
rb_define_module_function(rb_mZstd, "decompress_using_dict", rb_decompress_using_dict, -1);

rb_define_alloc_func(rb_cCDict, rb_cdict_alloc);
rb_define_private_method(rb_cCDict, "initialize", rb_cdict_initialize, -1);
Expand Down
2 changes: 1 addition & 1 deletion lib/zstd-ruby/version.rb
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
module Zstd
VERSION = "1.5.7.0"
VERSION = "2.0.0-preview1"
end
66 changes: 65 additions & 1 deletion spec/zstd-ruby-streaming-decompress_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,71 @@
end
end

describe 'decompress_with_pos' do
it 'should return decompressed data and consumed input position' do
str = "hello world test data"
cstr = Zstd.compress(str)
stream = Zstd::StreamingDecompress.new

# Test with partial input
result_array = stream.decompress_with_pos(cstr[0, 10])
expect(result_array).to be_an(Array)
expect(result_array.length).to eq(2)

decompressed_data = result_array[0]
consumed_bytes = result_array[1]

expect(decompressed_data).to be_a(String)
expect(consumed_bytes).to be_a(Integer)
expect(consumed_bytes).to be > 0
expect(consumed_bytes).to be <= 10
end

it 'should work with complete compressed data' do
str = "foo bar buzz"
cstr = Zstd.compress(str)
stream = Zstd::StreamingDecompress.new

result_array = stream.decompress_with_pos(cstr)
decompressed_data = result_array[0]
consumed_bytes = result_array[1]

expect(decompressed_data).to eq(str)
expect(consumed_bytes).to eq(cstr.length)
end

it 'should work with multiple calls' do
str = "test data for multiple calls"
cstr = Zstd.compress(str)
stream = Zstd::StreamingDecompress.new

result = ''
total_consumed = 0
chunk_size = 5

while total_consumed < cstr.length
remaining_data = cstr[total_consumed..-1]
chunk = remaining_data[0, chunk_size]

result_array = stream.decompress_with_pos(chunk)
decompressed_chunk = result_array[0]
consumed_bytes = result_array[1]

result << decompressed_chunk
total_consumed += consumed_bytes

expect(consumed_bytes).to be > 0
expect(consumed_bytes).to be <= chunk.length

# If we consumed less than the chunk size, we might be done or need more data
break if consumed_bytes < chunk.length && total_consumed == cstr.length
end

expect(result).to eq(str)
expect(total_consumed).to eq(cstr.length)
end
end

describe 'streaming decompress + GC.compact' do
it 'shoud work' do
# str = SecureRandom.hex(150)
Expand Down Expand Up @@ -109,4 +174,3 @@
end
end
end

Loading