Skip to content

Commit d57a012

Browse files
authored
Merge pull request #5 from casperisfine/set-batch-size
Allow to configure load_many batch size
2 parents e0d49ee + d0bcec7 commit d57a012

File tree

7 files changed

+65
-21
lines changed

7 files changed

+65
-21
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,5 @@
66
/pkg/
77
/spec/reports/
88
/tmp/
9+
*.so
10+
*.bundle

Gemfile.lock

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
PATH
22
remote: .
33
specs:
4-
fast_jsonparser (0.2.0)
4+
fast_jsonparser (0.3.0)
55

66
GEM
77
remote: https://rubygems.org/

Rakefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ require "rake/testtask"
33
require "rake/extensiontask"
44

55
Rake::ExtensionTask.new("fast_jsonparser") do |ext|
6+
ext.ext_dir = 'ext/fast_jsonparser'
67
ext.lib_dir = "lib/fast_jsonparser"
78
end
89

ext/fast_jsonparser/fast_jsonparser.cpp

Lines changed: 21 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,7 @@
22

33
#include "simdjson.h"
44

5-
VALUE rb_mFastJsonparser;
6-
7-
VALUE rb_eFastJsonparserParseError;
5+
VALUE rb_eFastJsonparserUnknownError, rb_eFastJsonparserParseError;
86

97
using namespace simdjson;
108

@@ -93,36 +91,44 @@ static VALUE rb_fast_jsonparser_load(VALUE self, VALUE arg)
9391
return Qnil;
9492
}
9593

96-
static VALUE rb_fast_jsonparser_load_many(VALUE self, VALUE arg)
94+
static VALUE rb_fast_jsonparser_load_many(VALUE self, VALUE arg, VALUE batch_size)
9795
{
9896
Check_Type(arg, T_STRING);
97+
Check_Type(batch_size, T_FIXNUM);
9998

100-
dom::parser parser;
101-
auto [docs, error] = parser.load_many(RSTRING_PTR(arg));
102-
if (error == SUCCESS)
103-
{
104-
for (dom::element doc : docs)
99+
try {
100+
dom::parser parser;
101+
auto [docs, error] = parser.load_many(RSTRING_PTR(arg), FIX2INT(batch_size));
102+
if (error == SUCCESS)
105103
{
106-
if (rb_block_given_p())
104+
for (dom::element doc : docs)
107105
{
108106
rb_yield(make_ruby_object(doc));
109107
}
108+
return Qnil;
110109
}
110+
rb_raise(rb_eFastJsonparserParseError, "parse error");
111111
return Qnil;
112+
} catch (simdjson::simdjson_error error) {
113+
rb_raise(rb_eFastJsonparserUnknownError, "%s", error.what());
112114
}
113-
rb_raise(rb_eFastJsonparserParseError, "parse error");
114-
return Qnil;
115115
}
116116

117117
extern "C"
118118
{
119119

120120
void Init_fast_jsonparser(void)
121121
{
122-
rb_mFastJsonparser = rb_define_module("FastJsonparser");
123-
rb_eFastJsonparserParseError = rb_define_class_under(rb_mFastJsonparser, "ParseError", rb_eStandardError);
122+
VALUE rb_mFastJsonparser = rb_const_get(rb_cObject, rb_intern("FastJsonparser"));
123+
124124
rb_define_module_function(rb_mFastJsonparser, "parse", reinterpret_cast<VALUE (*)(...)>(rb_fast_jsonparser_parse), 1);
125125
rb_define_module_function(rb_mFastJsonparser, "load", reinterpret_cast<VALUE (*)(...)>(rb_fast_jsonparser_load), 1);
126-
rb_define_module_function(rb_mFastJsonparser, "load_many", reinterpret_cast<VALUE (*)(...)>(rb_fast_jsonparser_load_many), 1);
126+
rb_define_module_function(rb_mFastJsonparser, "_load_many", reinterpret_cast<VALUE (*)(...)>(rb_fast_jsonparser_load_many), 2);
127+
128+
rb_eFastJsonparserParseError = rb_const_get(rb_mFastJsonparser, rb_intern("ParseError"));
129+
rb_global_variable(&rb_eFastJsonparserParseError);
130+
rb_eFastJsonparserUnknownError = rb_const_get(rb_mFastJsonparser, rb_intern("UnknownError"));
131+
rb_global_variable(&rb_eFastJsonparserUnknownError);
132+
127133
}
128134
}

lib/fast_jsonparser.rb

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,28 @@
1+
# frozen_string_literal: true
2+
13
require "fast_jsonparser/version"
24

35
module FastJsonparser
4-
class Error < StandardError; end
5-
# Your code goes here...
6-
end
6+
Error = Class.new(StandardError)
7+
ParseError = Class.new(Error)
8+
UnknownError = Class.new(Error)
9+
BatchSizeTooSmall = Class.new(Error)
10+
11+
DEFAULT_BATCH_SIZE = 1_000_000 # from include/simdjson/dom/parser.h
712

8-
require "fast_jsonparser/fast_jsonparser" # loads cpp extension
13+
class << self
14+
def load_many(source, batch_size: DEFAULT_BATCH_SIZE, &block)
15+
_load_many(source, batch_size, &block)
16+
rescue UnknownError => error
17+
case error.message
18+
when "This parser can't support a document that big"
19+
raise BatchSizeTooSmall, "One of the documents was bigger than the batch size (#{batch_size}B), try increasing it."
20+
else
21+
raise
22+
end
23+
end
24+
25+
require "fast_jsonparser/fast_jsonparser" # loads cpp extension
26+
private :_load_many
27+
end
28+
end
-224 KB
Binary file not shown.

test/fast_jsonparser_test.rb

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
require "test_helper"
2+
require 'tempfile'
3+
require 'json'
24

35
class FastJsonparserTest < Minitest::Test
46
def test_that_it_has_a_version_number
@@ -17,6 +19,19 @@ def test_json_parse_is_working
1719
end
1820

1921
def test_file_stream_is_working
20-
assert_nil FastJsonparser.load_many('./benchmark/nginx_json_logs.json')
22+
assert_nil FastJsonparser.load_many('./benchmark/nginx_json_logs.json') {}
23+
end
24+
25+
def test_load_many_batch_size
26+
Tempfile.create('documents') do |f|
27+
f.write({foo: "a" * 5_000}.to_json)
28+
f.close
29+
30+
assert_raises FastJsonparser::BatchSizeTooSmall do
31+
FastJsonparser.load_many(f.path, batch_size: 1_000) {}
32+
end
33+
34+
FastJsonparser.load_many(f.path, batch_size: 6_000) {}
35+
end
2136
end
2237
end

0 commit comments

Comments
 (0)