@@ -8,8 +8,6 @@ use std::ops::Deref;
88use std:: str:: FromStr ;
99use std:: sync:: Arc ;
1010
11- pub const EMPTY_STRING : & str = "" ;
12-
1311#[ derive( Clone ) ]
1412#[ repr( transparent) ]
1513pub struct CheetahString {
@@ -19,7 +17,10 @@ pub struct CheetahString {
1917impl Default for CheetahString {
2018 fn default ( ) -> Self {
2119 CheetahString {
22- inner : InnerString :: Empty ,
20+ inner : InnerString :: Inline {
21+ len : 0 ,
22+ data : [ 0 ; INLINE_CAPACITY ] ,
23+ } ,
2324 }
2425 }
2526}
@@ -176,11 +177,17 @@ impl From<CheetahString> for String {
176177 fn from ( s : CheetahString ) -> Self {
177178 match s {
178179 CheetahString {
179- inner : InnerString :: ArcString ( s) ,
180- } => s. as_ref ( ) . clone ( ) ,
180+ inner : InnerString :: Inline { len, data } ,
181+ } => {
182+ // SAFETY: Inline strings are always valid UTF-8
183+ unsafe { String :: from_utf8_unchecked ( data[ ..len as usize ] . to_vec ( ) ) }
184+ }
181185 CheetahString {
182186 inner : InnerString :: StaticStr ( s) ,
183187 } => s. to_string ( ) ,
188+ CheetahString {
189+ inner : InnerString :: ArcString ( s) ,
190+ } => s. as_ref ( ) . clone ( ) ,
184191 CheetahString {
185192 inner : InnerString :: ArcVecString ( s) ,
186193 } => {
@@ -194,9 +201,6 @@ impl From<CheetahString> for String {
194201 // SAFETY: Bytes variant should only be created from valid UTF-8 sources
195202 unsafe { String :: from_utf8_unchecked ( b. to_vec ( ) ) }
196203 }
197- CheetahString {
198- inner : InnerString :: Empty ,
199- } => String :: new ( ) ,
200204 }
201205 }
202206}
@@ -242,7 +246,10 @@ impl CheetahString {
242246 #[ inline]
243247 pub const fn empty ( ) -> Self {
244248 CheetahString {
245- inner : InnerString :: Empty ,
249+ inner : InnerString :: Inline {
250+ len : 0 ,
251+ data : [ 0 ; INLINE_CAPACITY ] ,
252+ } ,
246253 }
247254 }
248255
@@ -323,15 +330,41 @@ impl CheetahString {
323330
324331 #[ inline]
325332 pub fn from_slice ( s : & str ) -> Self {
326- CheetahString {
327- inner : InnerString :: ArcString ( Arc :: new ( s. to_owned ( ) ) ) ,
333+ if s. len ( ) <= INLINE_CAPACITY {
334+ // Use inline storage for short strings
335+ let mut data = [ 0u8 ; INLINE_CAPACITY ] ;
336+ data[ ..s. len ( ) ] . copy_from_slice ( s. as_bytes ( ) ) ;
337+ CheetahString {
338+ inner : InnerString :: Inline {
339+ len : s. len ( ) as u8 ,
340+ data,
341+ } ,
342+ }
343+ } else {
344+ // Use Arc for long strings
345+ CheetahString {
346+ inner : InnerString :: ArcString ( Arc :: new ( s. to_owned ( ) ) ) ,
347+ }
328348 }
329349 }
330350
331351 #[ inline]
332352 pub fn from_string ( s : String ) -> Self {
333- CheetahString {
334- inner : InnerString :: ArcString ( Arc :: new ( s) ) ,
353+ if s. len ( ) <= INLINE_CAPACITY {
354+ // Use inline storage for short strings
355+ let mut data = [ 0u8 ; INLINE_CAPACITY ] ;
356+ data[ ..s. len ( ) ] . copy_from_slice ( s. as_bytes ( ) ) ;
357+ CheetahString {
358+ inner : InnerString :: Inline {
359+ len : s. len ( ) as u8 ,
360+ data,
361+ } ,
362+ }
363+ } else {
364+ // Use Arc for long strings
365+ CheetahString {
366+ inner : InnerString :: ArcString ( Arc :: new ( s) ) ,
367+ }
335368 }
336369 }
337370 #[ inline]
@@ -352,8 +385,13 @@ impl CheetahString {
352385 #[ inline]
353386 pub fn as_str ( & self ) -> & str {
354387 match & self . inner {
355- InnerString :: ArcString ( s) => s. as_str ( ) ,
388+ InnerString :: Inline { len, data } => {
389+ // SAFETY: Inline strings are only created from valid UTF-8 sources.
390+ // The data is always valid UTF-8 up to len bytes.
391+ unsafe { std:: str:: from_utf8_unchecked ( & data[ ..* len as usize ] ) }
392+ }
356393 InnerString :: StaticStr ( s) => s,
394+ InnerString :: ArcString ( s) => s. as_str ( ) ,
357395 InnerString :: ArcVecString ( s) => {
358396 // SAFETY: ArcVecString is only created from validated UTF-8 sources.
359397 // All constructors ensure this invariant is maintained.
@@ -365,43 +403,42 @@ impl CheetahString {
365403 // The from_bytes constructor ensures this invariant.
366404 unsafe { std:: str:: from_utf8_unchecked ( b. as_ref ( ) ) }
367405 }
368- InnerString :: Empty => EMPTY_STRING ,
369406 }
370407 }
371408
372409 #[ inline]
373410 pub fn as_bytes ( & self ) -> & [ u8 ] {
374411 match & self . inner {
375- InnerString :: ArcString ( s ) => s . as_bytes ( ) ,
412+ InnerString :: Inline { len , data } => & data [ .. * len as usize ] ,
376413 InnerString :: StaticStr ( s) => s. as_bytes ( ) ,
414+ InnerString :: ArcString ( s) => s. as_bytes ( ) ,
377415 InnerString :: ArcVecString ( s) => s. as_ref ( ) ,
378416 #[ cfg( feature = "bytes" ) ]
379417 InnerString :: Bytes ( b) => b. as_ref ( ) ,
380- InnerString :: Empty => & [ ] ,
381418 }
382419 }
383420
384421 #[ inline]
385422 pub fn len ( & self ) -> usize {
386423 match & self . inner {
387- InnerString :: ArcString ( s ) => s . len ( ) ,
424+ InnerString :: Inline { len , .. } => * len as usize ,
388425 InnerString :: StaticStr ( s) => s. len ( ) ,
426+ InnerString :: ArcString ( s) => s. len ( ) ,
389427 InnerString :: ArcVecString ( s) => s. len ( ) ,
390428 #[ cfg( feature = "bytes" ) ]
391429 InnerString :: Bytes ( b) => b. len ( ) ,
392- InnerString :: Empty => 0 ,
393430 }
394431 }
395432
396433 #[ inline]
397434 pub fn is_empty ( & self ) -> bool {
398435 match & self . inner {
399- InnerString :: ArcString ( s ) => s . is_empty ( ) ,
436+ InnerString :: Inline { len , .. } => * len == 0 ,
400437 InnerString :: StaticStr ( s) => s. is_empty ( ) ,
438+ InnerString :: ArcString ( s) => s. is_empty ( ) ,
401439 InnerString :: ArcVecString ( s) => s. is_empty ( ) ,
402440 #[ cfg( feature = "bytes" ) ]
403441 InnerString :: Bytes ( b) => b. is_empty ( ) ,
404- InnerString :: Empty => true ,
405442 }
406443 }
407444}
@@ -506,20 +543,35 @@ impl Borrow<str> for CheetahString {
506543 }
507544}
508545
546+ /// Maximum capacity for inline string storage (23 bytes + 1 byte for length = 24 bytes total)
547+ const INLINE_CAPACITY : usize = 23 ;
548+
509549/// The `InnerString` enum represents different types of string storage.
510550///
551+ /// This enum uses Small String Optimization (SSO) to avoid heap allocations for short strings.
552+ ///
511553/// Variants:
512554///
513- /// * `ArcString(Arc<String>)` - A reference-counted string.
514- /// * `StaticStr(&'static str)` - A static string slice.
555+ /// * `Inline` - Inline storage for strings <= 23 bytes (zero heap allocations).
556+ /// * `StaticStr(&'static str)` - A static string slice (zero heap allocations).
557+ /// * `ArcString(Arc<String>)` - A reference-counted string (one heap allocation).
558+ /// * `ArcVecString(Arc<Vec<u8>>)` - A reference-counted byte vector.
515559/// * `Bytes(bytes::Bytes)` - A byte buffer (available when the "bytes" feature is enabled).
516- /// * `Empty` - An empty string.
517560#[ derive( Clone ) ]
518561pub ( super ) enum InnerString {
519- ArcString ( Arc < String > ) ,
562+ /// Inline storage for short strings (up to 23 bytes).
563+ /// Stores the length and data directly without heap allocation.
564+ Inline {
565+ len : u8 ,
566+ data : [ u8 ; INLINE_CAPACITY ] ,
567+ } ,
568+ /// Static string slice with 'static lifetime.
520569 StaticStr ( & ' static str ) ,
570+ /// Reference-counted heap-allocated string.
571+ ArcString ( Arc < String > ) ,
572+ /// Reference-counted heap-allocated byte vector.
521573 ArcVecString ( Arc < Vec < u8 > > ) ,
574+ /// Bytes type integration (requires "bytes" feature).
522575 #[ cfg( feature = "bytes" ) ]
523576 Bytes ( bytes:: Bytes ) ,
524- Empty ,
525577}
0 commit comments