|
1 | 1 | require "spec" |
2 | 2 | require "char/reader" |
3 | 3 |
|
4 | | -private def assert_invalid_byte_sequence(bytes) |
| 4 | +private def assert_invalid_byte_sequence(bytes, *, file = __FILE__, line = __LINE__) |
5 | 5 | reader = Char::Reader.new(String.new bytes) |
6 | | - reader.current_char.should eq(Char::REPLACEMENT) |
7 | | - reader.current_char_width.should eq(1) |
8 | | - reader.error.should eq(bytes[0]) |
| 6 | + reader.current_char.should eq(Char::REPLACEMENT), file: file, line: line |
| 7 | + reader.current_char_width.should eq(1), file: file, line: line |
| 8 | + reader.error.should eq(bytes[0]), file: file, line: line |
| 9 | +end |
| 10 | + |
| 11 | +private def assert_reads_at_end(bytes, *, file = __FILE__, line = __LINE__) |
| 12 | + str = String.new bytes |
| 13 | + reader = Char::Reader.new(str, pos: bytes.size) |
| 14 | + reader.previous_char |
| 15 | + reader.current_char.should eq(str[0]), file: file, line: line |
| 16 | + reader.current_char_width.should eq(bytes.size), file: file, line: line |
| 17 | + reader.pos.should eq(0), file: file, line: line |
| 18 | + reader.error.should be_nil, file: file, line: line |
| 19 | +end |
| 20 | + |
| 21 | +private def assert_invalid_byte_sequence_at_end(bytes, *, file = __FILE__, line = __LINE__) |
| 22 | + str = String.new bytes |
| 23 | + reader = Char::Reader.new(str, pos: bytes.size) |
| 24 | + reader.previous_char |
| 25 | + reader.current_char.should eq(Char::REPLACEMENT), file: file, line: line |
| 26 | + reader.current_char_width.should eq(1), file: file, line: line |
| 27 | + reader.pos.should eq(bytes.size - 1), file: file, line: line |
| 28 | + reader.error.should eq(bytes[-1]), file: file, line: line |
9 | 29 | end |
10 | 30 |
|
11 | 31 | describe "Char::Reader" do |
@@ -242,4 +262,112 @@ describe "Char::Reader" do |
242 | 262 | it "errors if fourth_byte is out of bounds" do |
243 | 263 | assert_invalid_byte_sequence Bytes[0xf4, 0x8f, 0xa0] |
244 | 264 | end |
| 265 | + |
| 266 | + describe "#previous_char" do |
| 267 | + it "reads on valid UTF-8" do |
| 268 | + assert_reads_at_end Bytes[0x00] |
| 269 | + assert_reads_at_end Bytes[0x7f] |
| 270 | + |
| 271 | + assert_reads_at_end Bytes[0xc2, 0x80] |
| 272 | + assert_reads_at_end Bytes[0xc2, 0xbf] |
| 273 | + assert_reads_at_end Bytes[0xdf, 0x80] |
| 274 | + assert_reads_at_end Bytes[0xdf, 0xbf] |
| 275 | + |
| 276 | + assert_reads_at_end Bytes[0xe1, 0x80, 0x80] |
| 277 | + assert_reads_at_end Bytes[0xe1, 0x80, 0xbf] |
| 278 | + assert_reads_at_end Bytes[0xe1, 0x9f, 0x80] |
| 279 | + assert_reads_at_end Bytes[0xe1, 0x9f, 0xbf] |
| 280 | + assert_reads_at_end Bytes[0xed, 0x80, 0x80] |
| 281 | + assert_reads_at_end Bytes[0xed, 0x80, 0xbf] |
| 282 | + assert_reads_at_end Bytes[0xed, 0x9f, 0x80] |
| 283 | + assert_reads_at_end Bytes[0xed, 0x9f, 0xbf] |
| 284 | + assert_reads_at_end Bytes[0xef, 0x80, 0x80] |
| 285 | + assert_reads_at_end Bytes[0xef, 0x80, 0xbf] |
| 286 | + assert_reads_at_end Bytes[0xef, 0x9f, 0x80] |
| 287 | + assert_reads_at_end Bytes[0xef, 0x9f, 0xbf] |
| 288 | + |
| 289 | + assert_reads_at_end Bytes[0xe0, 0xa0, 0x80] |
| 290 | + assert_reads_at_end Bytes[0xe0, 0xa0, 0xbf] |
| 291 | + assert_reads_at_end Bytes[0xe0, 0xbf, 0x80] |
| 292 | + assert_reads_at_end Bytes[0xe0, 0xbf, 0xbf] |
| 293 | + assert_reads_at_end Bytes[0xe1, 0xa0, 0x80] |
| 294 | + assert_reads_at_end Bytes[0xe1, 0xa0, 0xbf] |
| 295 | + assert_reads_at_end Bytes[0xe1, 0xbf, 0x80] |
| 296 | + assert_reads_at_end Bytes[0xe1, 0xbf, 0xbf] |
| 297 | + assert_reads_at_end Bytes[0xef, 0xa0, 0x80] |
| 298 | + assert_reads_at_end Bytes[0xef, 0xa0, 0xbf] |
| 299 | + assert_reads_at_end Bytes[0xef, 0xbf, 0x80] |
| 300 | + assert_reads_at_end Bytes[0xef, 0xbf, 0xbf] |
| 301 | + |
| 302 | + assert_reads_at_end Bytes[0xf1, 0x80, 0x80, 0x80] |
| 303 | + assert_reads_at_end Bytes[0xf1, 0x8f, 0x80, 0x80] |
| 304 | + assert_reads_at_end Bytes[0xf4, 0x80, 0x80, 0x80] |
| 305 | + assert_reads_at_end Bytes[0xf4, 0x8f, 0x80, 0x80] |
| 306 | + |
| 307 | + assert_reads_at_end Bytes[0xf0, 0x90, 0x80, 0x80] |
| 308 | + assert_reads_at_end Bytes[0xf0, 0xbf, 0x80, 0x80] |
| 309 | + assert_reads_at_end Bytes[0xf3, 0x90, 0x80, 0x80] |
| 310 | + assert_reads_at_end Bytes[0xf3, 0xbf, 0x80, 0x80] |
| 311 | + end |
| 312 | + |
| 313 | + it "errors on invalid UTF-8" do |
| 314 | + assert_invalid_byte_sequence_at_end Bytes[0x80] |
| 315 | + assert_invalid_byte_sequence_at_end Bytes[0xbf] |
| 316 | + assert_invalid_byte_sequence_at_end Bytes[0xc0] |
| 317 | + assert_invalid_byte_sequence_at_end Bytes[0xff] |
| 318 | + |
| 319 | + assert_invalid_byte_sequence_at_end Bytes[0x00, 0x80] |
| 320 | + assert_invalid_byte_sequence_at_end Bytes[0x7f, 0x80] |
| 321 | + assert_invalid_byte_sequence_at_end Bytes[0x80, 0x80] |
| 322 | + assert_invalid_byte_sequence_at_end Bytes[0x9f, 0x80] |
| 323 | + assert_invalid_byte_sequence_at_end Bytes[0xa0, 0x80] |
| 324 | + assert_invalid_byte_sequence_at_end Bytes[0xbf, 0x80] |
| 325 | + assert_invalid_byte_sequence_at_end Bytes[0xc0, 0x80] |
| 326 | + assert_invalid_byte_sequence_at_end Bytes[0xc1, 0x80] |
| 327 | + assert_invalid_byte_sequence_at_end Bytes[0xe0, 0x80] |
| 328 | + assert_invalid_byte_sequence_at_end Bytes[0xff, 0x80] |
| 329 | + |
| 330 | + assert_invalid_byte_sequence_at_end Bytes[0x00, 0x80, 0x80] |
| 331 | + assert_invalid_byte_sequence_at_end Bytes[0x7f, 0x80, 0x80] |
| 332 | + assert_invalid_byte_sequence_at_end Bytes[0x80, 0x80, 0x80] |
| 333 | + assert_invalid_byte_sequence_at_end Bytes[0x8f, 0x80, 0x80] |
| 334 | + assert_invalid_byte_sequence_at_end Bytes[0x90, 0x80, 0x80] |
| 335 | + assert_invalid_byte_sequence_at_end Bytes[0xbf, 0x80, 0x80] |
| 336 | + assert_invalid_byte_sequence_at_end Bytes[0xc0, 0x80, 0x80] |
| 337 | + assert_invalid_byte_sequence_at_end Bytes[0xc1, 0x80, 0x80] |
| 338 | + assert_invalid_byte_sequence_at_end Bytes[0xc2, 0x80, 0x80] |
| 339 | + assert_invalid_byte_sequence_at_end Bytes[0xdf, 0x80, 0x80] |
| 340 | + assert_invalid_byte_sequence_at_end Bytes[0xe0, 0x80, 0x80] |
| 341 | + assert_invalid_byte_sequence_at_end Bytes[0xe0, 0x9f, 0xbf] |
| 342 | + assert_invalid_byte_sequence_at_end Bytes[0xf0, 0x80, 0x80] |
| 343 | + assert_invalid_byte_sequence_at_end Bytes[0xff, 0x80, 0x80] |
| 344 | + |
| 345 | + assert_invalid_byte_sequence_at_end Bytes[0x00, 0xa0, 0x80] |
| 346 | + assert_invalid_byte_sequence_at_end Bytes[0x7f, 0xa0, 0x80] |
| 347 | + assert_invalid_byte_sequence_at_end Bytes[0x80, 0xa0, 0x80] |
| 348 | + assert_invalid_byte_sequence_at_end Bytes[0x8f, 0xa0, 0x80] |
| 349 | + assert_invalid_byte_sequence_at_end Bytes[0x90, 0xa0, 0x80] |
| 350 | + assert_invalid_byte_sequence_at_end Bytes[0xbf, 0xa0, 0x80] |
| 351 | + assert_invalid_byte_sequence_at_end Bytes[0xc0, 0xa0, 0x80] |
| 352 | + assert_invalid_byte_sequence_at_end Bytes[0xc1, 0xa0, 0x80] |
| 353 | + assert_invalid_byte_sequence_at_end Bytes[0xc2, 0xa0, 0x80] |
| 354 | + assert_invalid_byte_sequence_at_end Bytes[0xdf, 0xa0, 0x80] |
| 355 | + assert_invalid_byte_sequence_at_end Bytes[0xed, 0xa0, 0x80] |
| 356 | + assert_invalid_byte_sequence_at_end Bytes[0xed, 0xbf, 0xbf] |
| 357 | + assert_invalid_byte_sequence_at_end Bytes[0xf0, 0xa0, 0x80] |
| 358 | + assert_invalid_byte_sequence_at_end Bytes[0xff, 0xa0, 0x80] |
| 359 | + |
| 360 | + assert_invalid_byte_sequence_at_end Bytes[0x00, 0x80, 0x80, 0x80] |
| 361 | + assert_invalid_byte_sequence_at_end Bytes[0xef, 0x80, 0x80, 0x80] |
| 362 | + assert_invalid_byte_sequence_at_end Bytes[0xf0, 0x80, 0x80, 0x80] |
| 363 | + assert_invalid_byte_sequence_at_end Bytes[0xf5, 0x80, 0x80, 0x80] |
| 364 | + assert_invalid_byte_sequence_at_end Bytes[0xff, 0x80, 0x80, 0x80] |
| 365 | + |
| 366 | + assert_invalid_byte_sequence_at_end Bytes[0x00, 0x90, 0x80, 0x80] |
| 367 | + assert_invalid_byte_sequence_at_end Bytes[0xef, 0x90, 0x80, 0x80] |
| 368 | + assert_invalid_byte_sequence_at_end Bytes[0xf4, 0x90, 0x80, 0x80] |
| 369 | + assert_invalid_byte_sequence_at_end Bytes[0xf5, 0x90, 0x80, 0x80] |
| 370 | + assert_invalid_byte_sequence_at_end Bytes[0xff, 0x90, 0x80, 0x80] |
| 371 | + end |
| 372 | + end |
245 | 373 | end |
0 commit comments