diff --git a/src/cheetah_string.rs b/src/cheetah_string.rs index 461f615..4430f77 100644 --- a/src/cheetah_string.rs +++ b/src/cheetah_string.rs @@ -1,4 +1,5 @@ use core::fmt; +use core::str::Utf8Error; use std::borrow::{Borrow, Cow}; use std::cmp::Ordering; use std::fmt::Display; @@ -44,9 +45,18 @@ impl<'a> From<&'a str> for CheetahString { } } +/// # Safety Warning +/// +/// This implementation uses `unsafe` code and may cause undefined behavior +/// if the bytes are not valid UTF-8. Consider using `CheetahString::try_from_bytes()` +/// for safe UTF-8 validation. +/// +/// This implementation will be deprecated in a future version. impl From<&[u8]> for CheetahString { #[inline] fn from(b: &[u8]) -> Self { + // SAFETY: This is unsafe and may cause UB if bytes are not valid UTF-8. + // This will be deprecated in favor of try_from_bytes in the next version. CheetahString::from_slice(unsafe { std::str::from_utf8_unchecked(b) }) } } @@ -59,9 +69,18 @@ impl FromStr for CheetahString { } } +/// # Safety Warning +/// +/// This implementation uses `unsafe` code and may cause undefined behavior +/// if the bytes are not valid UTF-8. Consider using `CheetahString::try_from_vec()` +/// for safe UTF-8 validation. +/// +/// This implementation will be deprecated in a future version. impl From> for CheetahString { #[inline] fn from(v: Vec) -> Self { + // SAFETY: This is unsafe and may cause UB if bytes are not valid UTF-8. + // This will be deprecated in favor of try_from_vec in the next version. CheetahString::from_slice(unsafe { std::str::from_utf8_unchecked(&v) }) } } @@ -164,11 +183,17 @@ impl From for String { } => s.to_string(), CheetahString { inner: InnerString::ArcVecString(s), - } => unsafe { String::from_utf8_unchecked(s.to_vec()) }, + } => { + // SAFETY: ArcVecString should only be created from valid UTF-8 sources + unsafe { String::from_utf8_unchecked(s.to_vec()) } + } #[cfg(feature = "bytes")] CheetahString { inner: InnerString::Bytes(b), - } => unsafe { String::from_utf8_unchecked(b.to_vec()) }, + } => { + // SAFETY: Bytes variant should only be created from valid UTF-8 sources + unsafe { String::from_utf8_unchecked(b.to_vec()) } + } CheetahString { inner: InnerString::Empty, } => String::new(), @@ -240,6 +265,55 @@ impl CheetahString { } } + /// Creates a `CheetahString` from a byte vector with UTF-8 validation. + /// + /// # Errors + /// + /// Returns an error if the bytes are not valid UTF-8. + /// + /// # Examples + /// + /// ``` + /// use cheetah_string::CheetahString; + /// + /// let bytes = vec![104, 101, 108, 108, 111]; // "hello" + /// let s = CheetahString::try_from_vec(bytes).unwrap(); + /// assert_eq!(s, "hello"); + /// + /// let invalid = vec![0xFF, 0xFE]; + /// assert!(CheetahString::try_from_vec(invalid).is_err()); + /// ``` + pub fn try_from_vec(v: Vec) -> Result { + // Validate UTF-8 + std::str::from_utf8(&v)?; + Ok(CheetahString { + inner: InnerString::ArcVecString(Arc::new(v)), + }) + } + + /// Creates a `CheetahString` from a byte slice with UTF-8 validation. + /// + /// # Errors + /// + /// Returns an error if the bytes are not valid UTF-8. + /// + /// # Examples + /// + /// ``` + /// use cheetah_string::CheetahString; + /// + /// let bytes = b"hello"; + /// let s = CheetahString::try_from_bytes(bytes).unwrap(); + /// assert_eq!(s, "hello"); + /// + /// let invalid = &[0xFF, 0xFE]; + /// assert!(CheetahString::try_from_bytes(invalid).is_err()); + /// ``` + pub fn try_from_bytes(b: &[u8]) -> Result { + let s = std::str::from_utf8(b)?; + Ok(CheetahString::from_slice(s)) + } + #[inline] pub fn from_arc_vec(s: Arc>) -> Self { CheetahString { @@ -280,9 +354,17 @@ impl CheetahString { match &self.inner { InnerString::ArcString(s) => s.as_str(), InnerString::StaticStr(s) => s, - InnerString::ArcVecString(s) => std::str::from_utf8(s.as_ref()).unwrap(), + InnerString::ArcVecString(s) => { + // SAFETY: ArcVecString is only created from validated UTF-8 sources. + // All constructors ensure this invariant is maintained. + unsafe { std::str::from_utf8_unchecked(s.as_ref()) } + } #[cfg(feature = "bytes")] - InnerString::Bytes(b) => std::str::from_utf8(b.as_ref()).unwrap(), + InnerString::Bytes(b) => { + // SAFETY: Bytes variant is only created from validated UTF-8 sources. + // The from_bytes constructor ensures this invariant. + unsafe { std::str::from_utf8_unchecked(b.as_ref()) } + } InnerString::Empty => EMPTY_STRING, } } diff --git a/src/error.rs b/src/error.rs new file mode 100644 index 0000000..ca275c8 --- /dev/null +++ b/src/error.rs @@ -0,0 +1,46 @@ +use core::fmt; +use core::str::Utf8Error; + +/// Errors that can occur during CheetahString operations +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum Error { + /// UTF-8 validation failed + Utf8Error(Utf8Error), + /// Index out of bounds + IndexOutOfBounds { index: usize, len: usize }, + /// Invalid character boundary + InvalidCharBoundary { index: usize }, +} + +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Error::Utf8Error(e) => write!(f, "UTF-8 error: {}", e), + Error::IndexOutOfBounds { index, len } => { + write!(f, "index {} out of bounds (len: {})", index, len) + } + Error::InvalidCharBoundary { index } => { + write!(f, "index {} is not a char boundary", index) + } + } + } +} + +#[cfg(feature = "std")] +impl std::error::Error for Error { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + match self { + Error::Utf8Error(e) => Some(e), + _ => None, + } + } +} + +impl From for Error { + fn from(e: Utf8Error) -> Self { + Error::Utf8Error(e) + } +} + +/// Result type for CheetahString operations +pub type Result = core::result::Result; diff --git a/src/lib.rs b/src/lib.rs index 94161fb..d446a7f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -19,8 +19,10 @@ //! ``` //! mod cheetah_string; +mod error; #[cfg(feature = "serde")] mod serde; pub use cheetah_string::CheetahString; +pub use error::{Error, Result}; diff --git a/tests/basic.rs b/tests/basic.rs new file mode 100644 index 0000000..7f8aa62 --- /dev/null +++ b/tests/basic.rs @@ -0,0 +1,296 @@ +use cheetah_string::CheetahString; +use std::sync::Arc; + +#[test] +fn test_empty() { + let s = CheetahString::new(); + assert!(s.is_empty()); + assert_eq!(s.len(), 0); + assert_eq!(s.as_str(), ""); +} + +#[test] +fn test_default() { + let s = CheetahString::default(); + assert!(s.is_empty()); + assert_eq!(s, CheetahString::new()); +} + +#[test] +fn test_from_static() { + let s = CheetahString::from_static_str("hello"); + assert_eq!(s, "hello"); + assert_eq!(s.len(), 5); + assert!(!s.is_empty()); +} + +#[test] +fn test_from_string() { + let owned = String::from("hello world"); + let s = CheetahString::from(owned); + assert_eq!(s, "hello world"); + assert_eq!(s.len(), 11); +} + +#[test] +fn test_from_str() { + let s = CheetahString::from("hello"); + assert_eq!(s, "hello"); + assert_eq!(s.len(), 5); +} + +#[test] +fn test_from_char() { + let s = CheetahString::from('a'); + assert_eq!(s, "a"); + assert_eq!(s.len(), 1); + + let s = CheetahString::from('你'); + assert_eq!(s, "你"); + assert_eq!(s.len(), 3); // UTF-8 encoding is 3 bytes +} + +#[test] +fn test_clone() { + let s1 = CheetahString::from("hello"); + let s2 = s1.clone(); + assert_eq!(s1, s2); + assert_eq!(s1.as_str(), s2.as_str()); +} + +#[test] +fn test_clone_arc_sharing() { + let s1 = CheetahString::from_string("hello".to_string()); + let s2 = s1.clone(); + + // Both should point to the same string + assert_eq!(s1, s2); +} + +#[test] +fn test_eq() { + let s1 = CheetahString::from("hello"); + let s2 = CheetahString::from("hello"); + let s3 = CheetahString::from("world"); + + assert_eq!(s1, s2); + assert_ne!(s1, s3); + + // Test equality with str + assert_eq!(s1, "hello"); + assert_eq!("hello", s1); + + // Test equality with String + assert_eq!(s1, String::from("hello")); + assert_eq!(String::from("hello"), s1); +} + +#[test] +fn test_ord() { + let s1 = CheetahString::from("apple"); + let s2 = CheetahString::from("banana"); + let s3 = CheetahString::from("apple"); + + assert!(s1 < s2); + assert!(s2 > s1); + assert!(s1 <= s3); + assert!(s1 >= s3); +} + +#[test] +fn test_hash() { + use std::collections::HashMap; + + let mut map = HashMap::new(); + let key = CheetahString::from("key"); + map.insert(key.clone(), 42); + + assert_eq!(map.get(&key), Some(&42)); + assert_eq!(map.get(&CheetahString::from("key")), Some(&42)); +} + +#[test] +fn test_display() { + let s = CheetahString::from("hello"); + assert_eq!(format!("{}", s), "hello"); +} + +#[test] +fn test_debug() { + let s = CheetahString::from("hello"); + assert_eq!(format!("{:?}", s), "\"hello\""); +} + +#[test] +fn test_as_ref_str() { + let s = CheetahString::from("hello"); + let r: &str = s.as_ref(); + assert_eq!(r, "hello"); +} + +#[test] +fn test_as_ref_bytes() { + let s = CheetahString::from("hello"); + let b: &[u8] = s.as_ref(); + assert_eq!(b, b"hello"); +} + +#[test] +fn test_deref() { + let s = CheetahString::from("hello"); + assert_eq!(s.len(), 5); + assert!(s.starts_with("hel")); +} + +#[test] +fn test_borrow() { + use std::borrow::Borrow; + + let s = CheetahString::from("hello"); + let borrowed: &str = s.borrow(); + assert_eq!(borrowed, "hello"); +} + +#[test] +fn test_from_arc_string() { + let arc = Arc::new(String::from("hello")); + let s = CheetahString::from(arc); + assert_eq!(s, "hello"); +} + +#[test] +fn test_from_iter_str() { + let parts = vec!["hello", " ", "world"]; + let s: CheetahString = parts.into_iter().collect(); + assert_eq!(s, "hello world"); +} + +#[test] +fn test_from_iter_string() { + let parts = vec![ + String::from("hello"), + String::from(" "), + String::from("world"), + ]; + let s: CheetahString = parts.into_iter().collect(); + assert_eq!(s, "hello world"); +} + +#[test] +fn test_from_iter_chars() { + let chars = vec!['h', 'e', 'l', 'l', 'o']; + let s: CheetahString = chars.iter().collect(); + assert_eq!(s, "hello"); +} + +#[test] +fn test_try_from_valid_bytes() { + let bytes = b"hello"; + let s = CheetahString::try_from_bytes(bytes).unwrap(); + assert_eq!(s, "hello"); +} + +#[test] +fn test_try_from_invalid_bytes() { + let invalid = vec![0xFF, 0xFE]; + let result = CheetahString::try_from_bytes(&invalid); + assert!(result.is_err()); +} + +#[test] +fn test_try_from_valid_vec() { + let bytes = vec![104, 101, 108, 108, 111]; // "hello" + let s = CheetahString::try_from_vec(bytes).unwrap(); + assert_eq!(s, "hello"); +} + +#[test] +fn test_try_from_invalid_vec() { + let invalid = vec![0xFF, 0xFE]; + let result = CheetahString::try_from_vec(invalid); + assert!(result.is_err()); +} + +#[test] +fn test_try_from_bytes_method() { + let bytes = b"hello world"; + let s = CheetahString::try_from_bytes(bytes).unwrap(); + assert_eq!(s, "hello world"); + + let invalid = &[0xFF, 0xFE]; + assert!(CheetahString::try_from_bytes(invalid).is_err()); +} + +#[test] +fn test_try_from_vec_method() { + let bytes = vec![104, 101, 108, 108, 111]; + let s = CheetahString::try_from_vec(bytes).unwrap(); + assert_eq!(s, "hello"); + + let invalid = vec![0xFF, 0xFE]; + assert!(CheetahString::try_from_vec(invalid).is_err()); +} + +#[test] +fn test_unicode() { + let s = CheetahString::from("你好世界"); + assert_eq!(s, "你好世界"); + assert_eq!(s.len(), 12); // 4 chars * 3 bytes each +} + +#[test] +fn test_empty_string() { + let s = CheetahString::empty(); + assert!(s.is_empty()); + assert_eq!(s.len(), 0); +} + +#[test] +fn test_from_string_ref() { + let owned = String::from("hello"); + let s = CheetahString::from(&owned); + assert_eq!(s, "hello"); +} + +#[test] +fn test_to_string() { + let s = CheetahString::from("hello"); + let owned: String = s.into(); + assert_eq!(owned, "hello"); +} + +#[test] +fn test_cow_static() { + use std::borrow::Cow; + + let cow: Cow<'static, str> = Cow::Borrowed("hello"); + let s = CheetahString::from(cow); + assert_eq!(s, "hello"); +} + +#[test] +fn test_cow_owned() { + use std::borrow::Cow; + + let cow: Cow<'static, str> = Cow::Owned(String::from("hello")); + let s = CheetahString::from(cow); + assert_eq!(s, "hello"); +} + +#[test] +fn test_parse() { + use std::str::FromStr; + + let s = CheetahString::from_str("hello").unwrap(); + assert_eq!(s, "hello"); +} + +#[cfg(feature = "bytes")] +#[test] +fn test_from_bytes_feature() { + use bytes::Bytes; + + let bytes = Bytes::from("hello"); + let s = CheetahString::from(bytes); + assert_eq!(s, "hello"); +}