|
63 | 63 | end
|
64 | 64 | end
|
65 | 65 |
|
| 66 | + describe "rb_enc_strlen" do |
| 67 | + before :each do |
| 68 | + @str = 'こにちわ' # Each codepoint in this string is 3 bytes in UTF-8 |
| 69 | + end |
| 70 | + |
| 71 | + it "returns the correct string length for the encoding" do |
| 72 | + @s.rb_enc_strlen(@str, @str.bytesize, Encoding::UTF_8).should == 4 |
| 73 | + @s.rb_enc_strlen(@str, @str.bytesize, Encoding::BINARY).should == 12 |
| 74 | + end |
| 75 | + |
| 76 | + it "returns the string length based on a fixed-width encoding's character length, even if the encoding is incompatible" do |
| 77 | + @s.rb_enc_strlen(@str, @str.bytesize, Encoding::UTF_16BE).should == 6 |
| 78 | + @s.rb_enc_strlen(@str, @str.bytesize, Encoding::UTF_32BE).should == 3 |
| 79 | + end |
| 80 | + |
| 81 | + it "does not consider strings to be NUL-terminated" do |
| 82 | + s = "abc\0def" |
| 83 | + @s.rb_enc_strlen(s, s.bytesize, Encoding::US_ASCII).should == 7 |
| 84 | + @s.rb_enc_strlen(s, s.bytesize, Encoding::UTF_8).should == 7 |
| 85 | + end |
| 86 | + |
| 87 | + describe "handles broken strings" do |
| 88 | + it "combines valid character and invalid character counts in UTF-8" do |
| 89 | + # The result is 3 because `rb_enc_strlen` counts the first valid character and then adds |
| 90 | + # the byte count for the invalid character that follows for 1 + 2. |
| 91 | + @s.rb_enc_strlen(@str, 5, Encoding::UTF_8).should == 3 |
| 92 | + end |
| 93 | + |
| 94 | + it "rounds up for fixed-width encodings" do |
| 95 | + @s.rb_enc_strlen(@str, 5, Encoding::UTF_16BE).should == 3 |
| 96 | + @s.rb_enc_strlen(@str, 5, Encoding::UTF_32BE).should == 2 |
| 97 | + @s.rb_enc_strlen(@str, 5, Encoding::BINARY).should == 5 |
| 98 | + end |
| 99 | + end |
| 100 | + end |
| 101 | + |
66 | 102 | describe "rb_enc_find" do
|
67 | 103 | it "returns the encoding of an Encoding" do
|
68 | 104 | @s.rb_enc_find("UTF-8").should == "UTF-8"
|
|
0 commit comments