Skip to content

Commit d758acd

Browse files
committed
Add specs for rb_enc_strlen.
1 parent f29dd26 commit d758acd

File tree

2 files changed

+45
-0
lines changed

2 files changed

+45
-0
lines changed

spec/ruby/optional/capi/encoding_spec.rb

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,42 @@
6363
end
6464
end
6565

66+
describe "rb_enc_strlen" do
67+
before :each do
68+
@str = 'こにちわ' # Each codepoint in this string is 3 bytes in UTF-8
69+
end
70+
71+
it "returns the correct string length for the encoding" do
72+
@s.rb_enc_strlen(@str, @str.bytesize, Encoding::UTF_8).should == 4
73+
@s.rb_enc_strlen(@str, @str.bytesize, Encoding::BINARY).should == 12
74+
end
75+
76+
it "returns the string length based on a fixed-width encoding's character length, even if the encoding is incompatible" do
77+
@s.rb_enc_strlen(@str, @str.bytesize, Encoding::UTF_16BE).should == 6
78+
@s.rb_enc_strlen(@str, @str.bytesize, Encoding::UTF_32BE).should == 3
79+
end
80+
81+
it "does not consider strings to be NUL-terminated" do
82+
s = "abc\0def"
83+
@s.rb_enc_strlen(s, s.bytesize, Encoding::US_ASCII).should == 7
84+
@s.rb_enc_strlen(s, s.bytesize, Encoding::UTF_8).should == 7
85+
end
86+
87+
describe "handles broken strings" do
88+
it "combines valid character and invalid character counts in UTF-8" do
89+
# The result is 3 because `rb_enc_strlen` counts the first valid character and then adds
90+
# the byte count for the invalid character that follows for 1 + 2.
91+
@s.rb_enc_strlen(@str, 5, Encoding::UTF_8).should == 3
92+
end
93+
94+
it "rounds up for fixed-width encodings" do
95+
@s.rb_enc_strlen(@str, 5, Encoding::UTF_16BE).should == 3
96+
@s.rb_enc_strlen(@str, 5, Encoding::UTF_32BE).should == 2
97+
@s.rb_enc_strlen(@str, 5, Encoding::BINARY).should == 5
98+
end
99+
end
100+
end
101+
66102
describe "rb_enc_find" do
67103
it "returns the encoding of an Encoding" do
68104
@s.rb_enc_find("UTF-8").should == "UTF-8"

spec/ruby/optional/capi/ext/encoding_spec.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -301,6 +301,14 @@ static VALUE encoding_spec_rb_enc_codelen(VALUE self, VALUE code, VALUE encoding
301301
return INT2FIX(rb_enc_codelen(c, enc));
302302
}
303303

304+
static VALUE encoding_spec_rb_enc_strlen(VALUE self, VALUE str, VALUE length, VALUE encoding) {
305+
int l = FIX2INT(length);
306+
char *p = RSTRING_PTR(str);
307+
char *e = p + l;
308+
309+
return LONG2FIX(rb_enc_strlen(p, e, rb_to_encoding(encoding)));
310+
}
311+
304312
void Init_encoding_spec(void) {
305313
VALUE cls;
306314
native_rb_encoding_pointer = (rb_encoding**) malloc(sizeof(rb_encoding*));
@@ -335,6 +343,7 @@ void Init_encoding_spec(void) {
335343
rb_define_method(cls, "rb_enc_compatible", encoding_spec_rb_enc_compatible, 2);
336344
rb_define_method(cls, "rb_enc_copy", encoding_spec_rb_enc_copy, 2);
337345
rb_define_method(cls, "rb_enc_codelen", encoding_spec_rb_enc_codelen, 2);
346+
rb_define_method(cls, "rb_enc_strlen", encoding_spec_rb_enc_strlen, 3);
338347
rb_define_method(cls, "rb_enc_find", encoding_spec_rb_enc_find, 1);
339348
rb_define_method(cls, "rb_enc_find_index", encoding_spec_rb_enc_find_index, 1);
340349
rb_define_method(cls, "rb_enc_isalnum", encoding_spec_rb_enc_isalnum, 2);

0 commit comments

Comments
 (0)