|
11 | 11 |
|
12 | 12 | use crate::__internal::runtime::InnerProtoString; |
13 | 13 | use crate::__internal::{Private, SealedInternal}; |
14 | | -use crate::{ |
15 | | - utf8::Utf8Chunks, AsView, IntoProxied, IntoView, Mut, MutProxied, Optional, Proxied, View, |
16 | | -}; |
| 14 | +use crate::{AsView, IntoProxied, IntoView, Mut, MutProxied, Optional, Proxied, View}; |
17 | 15 | use std::borrow::Cow; |
18 | 16 | use std::cmp::{Eq, Ord, Ordering, PartialEq, PartialOrd}; |
19 | 17 | use std::convert::{AsMut, AsRef}; |
@@ -394,43 +392,6 @@ impl ProtoStr { |
394 | 392 | self.0.len() |
395 | 393 | } |
396 | 394 |
|
397 | | - /// Iterates over the `char`s in this protobuf `string`. |
398 | | - /// |
399 | | - /// Invalid UTF-8 sequences are replaced with |
400 | | - /// [`U+FFFD REPLACEMENT CHARACTER`]. |
401 | | - /// |
402 | | - /// [`U+FFFD REPLACEMENT CHARACTER`]: std::char::REPLACEMENT_CHARACTER |
403 | | - pub fn chars(&self) -> impl Iterator<Item = char> + '_ + fmt::Debug { |
404 | | - Utf8Chunks::new(self.as_bytes()).flat_map(|chunk| { |
405 | | - let mut yield_replacement_char = !chunk.invalid().is_empty(); |
406 | | - chunk.valid().chars().chain(iter::from_fn(move || { |
407 | | - // Yield a single replacement character for every |
408 | | - // non-empty invalid sequence. |
409 | | - yield_replacement_char.then(|| { |
410 | | - yield_replacement_char = false; |
411 | | - char::REPLACEMENT_CHARACTER |
412 | | - }) |
413 | | - })) |
414 | | - }) |
415 | | - } |
416 | | - |
417 | | - /// Returns an iterator over chunks of UTF-8 data in the string. |
418 | | - /// |
419 | | - /// An `Ok(&str)` is yielded for every valid UTF-8 chunk, and an |
420 | | - /// `Err(&[u8])` for each non-UTF-8 chunk. An `Err` will be emitted |
421 | | - /// multiple times in a row for contiguous invalid chunks. Each invalid |
422 | | - /// chunk in an `Err` has a maximum length of 3 bytes. |
423 | | - pub fn utf8_chunks(&self) -> impl Iterator<Item = Result<&str, &[u8]>> + '_ { |
424 | | - Utf8Chunks::new(self.as_bytes()).flat_map(|chunk| { |
425 | | - let valid = chunk.valid(); |
426 | | - let invalid = chunk.invalid(); |
427 | | - (!valid.is_empty()) |
428 | | - .then_some(Ok(valid)) |
429 | | - .into_iter() |
430 | | - .chain((!invalid.is_empty()).then_some(Err(invalid))) |
431 | | - }) |
432 | | - } |
433 | | - |
434 | 395 | /// Converts known-UTF-8 bytes to a `ProtoStr` without a check. |
435 | 396 | /// |
436 | 397 | /// # Safety |
@@ -486,20 +447,24 @@ impl<'msg> TryFrom<&'msg [u8]> for &'msg ProtoStr { |
486 | 447 |
|
487 | 448 | impl fmt::Debug for ProtoStr { |
488 | 449 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
489 | | - fmt::Debug::fmt(&Utf8Chunks::new(self.as_bytes()).debug(), f) |
| 450 | + write!(f, "\""); |
| 451 | + for chunk in self.as_bytes().utf8_chunks() { |
| 452 | + for ch in chunk.valid().chars() { |
| 453 | + write!(f, "{}", ch.escape_debug()); |
| 454 | + } |
| 455 | + for byte in chunk.invalid() { |
| 456 | + // Format byte as \xff. |
| 457 | + write!(f, "\\x{:02X}", byte); |
| 458 | + } |
| 459 | + } |
| 460 | + write!(f, "\""); |
| 461 | + Ok(()) |
490 | 462 | } |
491 | 463 | } |
492 | 464 |
|
493 | 465 | impl fmt::Display for ProtoStr { |
494 | 466 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
495 | | - use std::fmt::Write as _; |
496 | | - for chunk in Utf8Chunks::new(self.as_bytes()) { |
497 | | - fmt::Display::fmt(chunk.valid(), f)?; |
498 | | - if !chunk.invalid().is_empty() { |
499 | | - // One invalid chunk is emitted per detected invalid sequence. |
500 | | - f.write_char(char::REPLACEMENT_CHARACTER)?; |
501 | | - } |
502 | | - } |
| 467 | + fmt::Display::fmt(&String::from_utf8_lossy(self.as_bytes()), f)?; |
503 | 468 | Ok(()) |
504 | 469 | } |
505 | 470 | } |
@@ -608,199 +573,4 @@ mod tests { |
608 | 573 | // inside of `ProtoStr`. |
609 | 574 | unsafe { ProtoStr::from_utf8_unchecked(bytes) } |
610 | 575 | } |
611 | | - |
612 | | - // UTF-8 test cases copied from: |
613 | | - // https://github.com/rust-lang/rust/blob/e8ee0b7/library/core/tests/str_lossy.rs |
614 | | - |
615 | | - #[gtest] |
616 | | - fn proto_str_debug() { |
617 | | - assert_eq!(&format!("{:?}", test_proto_str(b"Hello There")), "\"Hello There\""); |
618 | | - assert_eq!( |
619 | | - &format!( |
620 | | - "{:?}", |
621 | | - test_proto_str(b"Hello\xC0\x80 There\xE6\x83 Goodbye\xf4\x8d\x93\xaa"), |
622 | | - ), |
623 | | - "\"Hello\\xC0\\x80 There\\xE6\\x83 Goodbye\\u{10d4ea}\"", |
624 | | - ); |
625 | | - } |
626 | | - |
627 | | - #[gtest] |
628 | | - fn proto_str_display() { |
629 | | - assert_eq!(&test_proto_str(b"Hello There").to_string(), "Hello There"); |
630 | | - assert_eq!( |
631 | | - &test_proto_str(b"Hello\xC0\x80 There\xE6\x83 Goodbye\xf4\x8d\x93\xaa").to_string(), |
632 | | - "Hello�� There� Goodbye\u{10d4ea}", |
633 | | - ); |
634 | | - } |
635 | | - |
636 | | - #[gtest] |
637 | | - fn proto_str_to_rust_str() { |
638 | | - assert_eq!(test_proto_str(b"hello").to_str(), Ok("hello")); |
639 | | - assert_eq!(test_proto_str("ศไทย中华Việt Nam".as_bytes()).to_str(), Ok("ศไทย中华Việt Nam")); |
640 | | - for expect_fail in [ |
641 | | - &b"Hello\xC2 There\xFF Goodbye"[..], |
642 | | - b"Hello\xC0\x80 There\xE6\x83 Goodbye", |
643 | | - b"\xF5foo\xF5\x80bar", |
644 | | - b"\xF1foo\xF1\x80bar\xF1\x80\x80baz", |
645 | | - b"\xF4foo\xF4\x80bar\xF4\xBFbaz", |
646 | | - b"\xF0\x80\x80\x80foo\xF0\x90\x80\x80bar", |
647 | | - b"\xED\xA0\x80foo\xED\xBF\xBFbar", |
648 | | - ] { |
649 | | - assert!( |
650 | | - matches!(test_proto_str(expect_fail).to_str(), Err(Utf8Error { inner: _ })), |
651 | | - "{expect_fail:?}" |
652 | | - ); |
653 | | - } |
654 | | - } |
655 | | - |
656 | | - #[gtest] |
657 | | - fn proto_str_to_cow() { |
658 | | - assert_eq!(test_proto_str(b"hello").to_cow_lossy(), Cow::Borrowed("hello")); |
659 | | - assert_eq!( |
660 | | - test_proto_str("ศไทย中华Việt Nam".as_bytes()).to_cow_lossy(), |
661 | | - Cow::Borrowed("ศไทย中华Việt Nam") |
662 | | - ); |
663 | | - for (bytes, lossy_str) in [ |
664 | | - (&b"Hello\xC2 There\xFF Goodbye"[..], "Hello� There� Goodbye"), |
665 | | - (b"Hello\xC0\x80 There\xE6\x83 Goodbye", "Hello�� There� Goodbye"), |
666 | | - (b"\xF5foo\xF5\x80bar", "�foo��bar"), |
667 | | - (b"\xF1foo\xF1\x80bar\xF1\x80\x80baz", "�foo�bar�baz"), |
668 | | - (b"\xF4foo\xF4\x80bar\xF4\xBFbaz", "�foo�bar��baz"), |
669 | | - (b"\xF0\x80\x80\x80foo\xF0\x90\x80\x80bar", "����foo\u{10000}bar"), |
670 | | - (b"\xED\xA0\x80foo\xED\xBF\xBFbar", "���foo���bar"), |
671 | | - ] { |
672 | | - let cow = test_proto_str(bytes).to_cow_lossy(); |
673 | | - assert!(matches!(cow, Cow::Owned(_))); |
674 | | - assert_eq!(&*cow, lossy_str, "{bytes:?}"); |
675 | | - } |
676 | | - } |
677 | | - |
678 | | - #[gtest] |
679 | | - fn proto_str_utf8_chunks() { |
680 | | - macro_rules! assert_chunks { |
681 | | - ($bytes:expr, $($chunks:expr),* $(,)?) => { |
682 | | - let bytes = $bytes; |
683 | | - let chunks: &[std::result::Result<&str, &[u8]>] = &[$($chunks),*]; |
684 | | - let s = test_proto_str(bytes); |
685 | | - let mut got_chunks = s.utf8_chunks(); |
686 | | - let mut expected_chars = chunks.iter().copied(); |
687 | | - assert!(got_chunks.eq(expected_chars), "{bytes:?} -> {chunks:?}"); |
688 | | - }; |
689 | | - } |
690 | | - assert_chunks!(b"hello", Ok("hello")); |
691 | | - assert_chunks!("ศไทย中华Việt Nam".as_bytes(), Ok("ศไทย中华Việt Nam")); |
692 | | - assert_chunks!( |
693 | | - b"Hello\xC2 There\xFF Goodbye", |
694 | | - Ok("Hello"), |
695 | | - Err(b"\xC2"), |
696 | | - Ok(" There"), |
697 | | - Err(b"\xFF"), |
698 | | - Ok(" Goodbye"), |
699 | | - ); |
700 | | - assert_chunks!( |
701 | | - b"Hello\xC0\x80 There\xE6\x83 Goodbye", |
702 | | - Ok("Hello"), |
703 | | - Err(b"\xC0"), |
704 | | - Err(b"\x80"), |
705 | | - Ok(" There"), |
706 | | - Err(b"\xE6\x83"), |
707 | | - Ok(" Goodbye"), |
708 | | - ); |
709 | | - assert_chunks!( |
710 | | - b"\xF5foo\xF5\x80bar", |
711 | | - Err(b"\xF5"), |
712 | | - Ok("foo"), |
713 | | - Err(b"\xF5"), |
714 | | - Err(b"\x80"), |
715 | | - Ok("bar"), |
716 | | - ); |
717 | | - assert_chunks!( |
718 | | - b"\xF1foo\xF1\x80bar\xF1\x80\x80baz", |
719 | | - Err(b"\xF1"), |
720 | | - Ok("foo"), |
721 | | - Err(b"\xF1\x80"), |
722 | | - Ok("bar"), |
723 | | - Err(b"\xF1\x80\x80"), |
724 | | - Ok("baz"), |
725 | | - ); |
726 | | - assert_chunks!( |
727 | | - b"\xF4foo\xF4\x80bar\xF4\xBFbaz", |
728 | | - Err(b"\xF4"), |
729 | | - Ok("foo"), |
730 | | - Err(b"\xF4\x80"), |
731 | | - Ok("bar"), |
732 | | - Err(b"\xF4"), |
733 | | - Err(b"\xBF"), |
734 | | - Ok("baz"), |
735 | | - ); |
736 | | - assert_chunks!( |
737 | | - b"\xF0\x80\x80\x80foo\xF0\x90\x80\x80bar", |
738 | | - Err(b"\xF0"), |
739 | | - Err(b"\x80"), |
740 | | - Err(b"\x80"), |
741 | | - Err(b"\x80"), |
742 | | - Ok("foo\u{10000}bar"), |
743 | | - ); |
744 | | - assert_chunks!( |
745 | | - b"\xED\xA0\x80foo\xED\xBF\xBFbar", |
746 | | - Err(b"\xED"), |
747 | | - Err(b"\xA0"), |
748 | | - Err(b"\x80"), |
749 | | - Ok("foo"), |
750 | | - Err(b"\xED"), |
751 | | - Err(b"\xBF"), |
752 | | - Err(b"\xBF"), |
753 | | - Ok("bar"), |
754 | | - ); |
755 | | - } |
756 | | - |
757 | | - #[gtest] |
758 | | - fn proto_str_chars() { |
759 | | - macro_rules! assert_chars { |
760 | | - ($bytes:expr, $chars:expr) => { |
761 | | - let bytes = $bytes; |
762 | | - let chars = $chars; |
763 | | - let s = test_proto_str(bytes); |
764 | | - let mut got_chars = s.chars(); |
765 | | - let mut expected_chars = chars.into_iter(); |
766 | | - assert!(got_chars.eq(expected_chars), "{bytes:?} -> {chars:?}"); |
767 | | - }; |
768 | | - } |
769 | | - assert_chars!(b"hello", ['h', 'e', 'l', 'l', 'o']); |
770 | | - assert_chars!( |
771 | | - "ศไทย中华Việt Nam".as_bytes(), |
772 | | - ['ศ', 'ไ', 'ท', 'ย', '中', '华', 'V', 'i', 'ệ', 't', ' ', 'N', 'a', 'm'] |
773 | | - ); |
774 | | - assert_chars!( |
775 | | - b"Hello\xC2 There\xFF Goodbye", |
776 | | - [ |
777 | | - 'H', 'e', 'l', 'l', 'o', '�', ' ', 'T', 'h', 'e', 'r', 'e', '�', ' ', 'G', 'o', |
778 | | - 'o', 'd', 'b', 'y', 'e' |
779 | | - ] |
780 | | - ); |
781 | | - assert_chars!( |
782 | | - b"Hello\xC0\x80 There\xE6\x83 Goodbye", |
783 | | - [ |
784 | | - 'H', 'e', 'l', 'l', 'o', '�', '�', ' ', 'T', 'h', 'e', 'r', 'e', '�', ' ', 'G', |
785 | | - 'o', 'o', 'd', 'b', 'y', 'e' |
786 | | - ] |
787 | | - ); |
788 | | - assert_chars!(b"\xF5foo\xF5\x80bar", ['�', 'f', 'o', 'o', '�', '�', 'b', 'a', 'r']); |
789 | | - assert_chars!( |
790 | | - b"\xF1foo\xF1\x80bar\xF1\x80\x80baz", |
791 | | - ['�', 'f', 'o', 'o', '�', 'b', 'a', 'r', '�', 'b', 'a', 'z'] |
792 | | - ); |
793 | | - assert_chars!( |
794 | | - b"\xF4foo\xF4\x80bar\xF4\xBFbaz", |
795 | | - ['�', 'f', 'o', 'o', '�', 'b', 'a', 'r', '�', '�', 'b', 'a', 'z'] |
796 | | - ); |
797 | | - assert_chars!( |
798 | | - b"\xF0\x80\x80\x80foo\xF0\x90\x80\x80bar", |
799 | | - ['�', '�', '�', '�', 'f', 'o', 'o', '\u{10000}', 'b', 'a', 'r'] |
800 | | - ); |
801 | | - assert_chars!( |
802 | | - b"\xED\xA0\x80foo\xED\xBF\xBFbar", |
803 | | - ['�', '�', '�', 'f', 'o', 'o', '�', '�', '�', 'b', 'a', 'r'] |
804 | | - ); |
805 | | - } |
806 | 576 | } |
0 commit comments