11use core:: fmt;
2+ use core:: str:: Utf8Error ;
23use std:: borrow:: { Borrow , Cow } ;
34use std:: cmp:: Ordering ;
45use std:: fmt:: Display ;
@@ -44,9 +45,18 @@ impl<'a> From<&'a str> for CheetahString {
4445 }
4546}
4647
48+ /// # Safety Warning
49+ ///
50+ /// This implementation uses `unsafe` code and may cause undefined behavior
51+ /// if the bytes are not valid UTF-8. Consider using `CheetahString::try_from_bytes()`
52+ /// for safe UTF-8 validation.
53+ ///
54+ /// This implementation will be deprecated in a future version.
4755impl From < & [ u8 ] > for CheetahString {
4856 #[ inline]
4957 fn from ( b : & [ u8 ] ) -> Self {
58+ // SAFETY: This is unsafe and may cause UB if bytes are not valid UTF-8.
59+ // This will be deprecated in favor of try_from_bytes in the next version.
5060 CheetahString :: from_slice ( unsafe { std:: str:: from_utf8_unchecked ( b) } )
5161 }
5262}
@@ -59,9 +69,18 @@ impl FromStr for CheetahString {
5969 }
6070}
6171
72+ /// # Safety Warning
73+ ///
74+ /// This implementation uses `unsafe` code and may cause undefined behavior
75+ /// if the bytes are not valid UTF-8. Consider using `CheetahString::try_from_vec()`
76+ /// for safe UTF-8 validation.
77+ ///
78+ /// This implementation will be deprecated in a future version.
6279impl From < Vec < u8 > > for CheetahString {
6380 #[ inline]
6481 fn from ( v : Vec < u8 > ) -> Self {
82+ // SAFETY: This is unsafe and may cause UB if bytes are not valid UTF-8.
83+ // This will be deprecated in favor of try_from_vec in the next version.
6584 CheetahString :: from_slice ( unsafe { std:: str:: from_utf8_unchecked ( & v) } )
6685 }
6786}
@@ -164,11 +183,17 @@ impl From<CheetahString> for String {
164183 } => s. to_string ( ) ,
165184 CheetahString {
166185 inner : InnerString :: ArcVecString ( s) ,
167- } => unsafe { String :: from_utf8_unchecked ( s. to_vec ( ) ) } ,
186+ } => {
187+ // SAFETY: ArcVecString should only be created from valid UTF-8 sources
188+ unsafe { String :: from_utf8_unchecked ( s. to_vec ( ) ) }
189+ }
168190 #[ cfg( feature = "bytes" ) ]
169191 CheetahString {
170192 inner : InnerString :: Bytes ( b) ,
171- } => unsafe { String :: from_utf8_unchecked ( b. to_vec ( ) ) } ,
193+ } => {
194+ // SAFETY: Bytes variant should only be created from valid UTF-8 sources
195+ unsafe { String :: from_utf8_unchecked ( b. to_vec ( ) ) }
196+ }
172197 CheetahString {
173198 inner : InnerString :: Empty ,
174199 } => String :: new ( ) ,
@@ -240,6 +265,55 @@ impl CheetahString {
240265 }
241266 }
242267
268+ /// Creates a `CheetahString` from a byte vector with UTF-8 validation.
269+ ///
270+ /// # Errors
271+ ///
272+ /// Returns an error if the bytes are not valid UTF-8.
273+ ///
274+ /// # Examples
275+ ///
276+ /// ```
277+ /// use cheetah_string::CheetahString;
278+ ///
279+ /// let bytes = vec![104, 101, 108, 108, 111]; // "hello"
280+ /// let s = CheetahString::try_from_vec(bytes).unwrap();
281+ /// assert_eq!(s, "hello");
282+ ///
283+ /// let invalid = vec![0xFF, 0xFE];
284+ /// assert!(CheetahString::try_from_vec(invalid).is_err());
285+ /// ```
286+ pub fn try_from_vec ( v : Vec < u8 > ) -> Result < Self , Utf8Error > {
287+ // Validate UTF-8
288+ std:: str:: from_utf8 ( & v) ?;
289+ Ok ( CheetahString {
290+ inner : InnerString :: ArcVecString ( Arc :: new ( v) ) ,
291+ } )
292+ }
293+
294+ /// Creates a `CheetahString` from a byte slice with UTF-8 validation.
295+ ///
296+ /// # Errors
297+ ///
298+ /// Returns an error if the bytes are not valid UTF-8.
299+ ///
300+ /// # Examples
301+ ///
302+ /// ```
303+ /// use cheetah_string::CheetahString;
304+ ///
305+ /// let bytes = b"hello";
306+ /// let s = CheetahString::try_from_bytes(bytes).unwrap();
307+ /// assert_eq!(s, "hello");
308+ ///
309+ /// let invalid = &[0xFF, 0xFE];
310+ /// assert!(CheetahString::try_from_bytes(invalid).is_err());
311+ /// ```
312+ pub fn try_from_bytes ( b : & [ u8 ] ) -> Result < Self , Utf8Error > {
313+ let s = std:: str:: from_utf8 ( b) ?;
314+ Ok ( CheetahString :: from_slice ( s) )
315+ }
316+
243317 #[ inline]
244318 pub fn from_arc_vec ( s : Arc < Vec < u8 > > ) -> Self {
245319 CheetahString {
@@ -280,9 +354,17 @@ impl CheetahString {
280354 match & self . inner {
281355 InnerString :: ArcString ( s) => s. as_str ( ) ,
282356 InnerString :: StaticStr ( s) => s,
283- InnerString :: ArcVecString ( s) => std:: str:: from_utf8 ( s. as_ref ( ) ) . unwrap ( ) ,
357+ InnerString :: ArcVecString ( s) => {
358+ // SAFETY: ArcVecString is only created from validated UTF-8 sources.
359+ // All constructors ensure this invariant is maintained.
360+ unsafe { std:: str:: from_utf8_unchecked ( s. as_ref ( ) ) }
361+ }
284362 #[ cfg( feature = "bytes" ) ]
285- InnerString :: Bytes ( b) => std:: str:: from_utf8 ( b. as_ref ( ) ) . unwrap ( ) ,
363+ InnerString :: Bytes ( b) => {
364+ // SAFETY: Bytes variant is only created from validated UTF-8 sources.
365+ // The from_bytes constructor ensures this invariant.
366+ unsafe { std:: str:: from_utf8_unchecked ( b. as_ref ( ) ) }
367+ }
286368 InnerString :: Empty => EMPTY_STRING ,
287369 }
288370 }
0 commit comments