Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 78 additions & 26 deletions src/cheetah_string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,6 @@ use std::ops::Deref;
use std::str::FromStr;
use std::sync::Arc;

pub const EMPTY_STRING: &str = "";

#[derive(Clone)]
#[repr(transparent)]
pub struct CheetahString {
Expand All @@ -19,7 +17,10 @@ pub struct CheetahString {
impl Default for CheetahString {
fn default() -> Self {
CheetahString {
inner: InnerString::Empty,
inner: InnerString::Inline {
len: 0,
data: [0; INLINE_CAPACITY],
},
}
}
}
Expand Down Expand Up @@ -176,11 +177,17 @@ impl From<CheetahString> for String {
fn from(s: CheetahString) -> Self {
match s {
CheetahString {
inner: InnerString::ArcString(s),
} => s.as_ref().clone(),
inner: InnerString::Inline { len, data },
} => {
// SAFETY: Inline strings are always valid UTF-8
unsafe { String::from_utf8_unchecked(data[..len as usize].to_vec()) }
}
CheetahString {
inner: InnerString::StaticStr(s),
} => s.to_string(),
CheetahString {
inner: InnerString::ArcString(s),
} => s.as_ref().clone(),
CheetahString {
inner: InnerString::ArcVecString(s),
} => {
Expand All @@ -194,9 +201,6 @@ impl From<CheetahString> for String {
// SAFETY: Bytes variant should only be created from valid UTF-8 sources
unsafe { String::from_utf8_unchecked(b.to_vec()) }
}
CheetahString {
inner: InnerString::Empty,
} => String::new(),
}
}
}
Expand Down Expand Up @@ -242,7 +246,10 @@ impl CheetahString {
#[inline]
pub const fn empty() -> Self {
CheetahString {
inner: InnerString::Empty,
inner: InnerString::Inline {
len: 0,
data: [0; INLINE_CAPACITY],
},
}
}

Expand Down Expand Up @@ -323,15 +330,41 @@ impl CheetahString {

#[inline]
pub fn from_slice(s: &str) -> Self {
CheetahString {
inner: InnerString::ArcString(Arc::new(s.to_owned())),
if s.len() <= INLINE_CAPACITY {
// Use inline storage for short strings
let mut data = [0u8; INLINE_CAPACITY];
data[..s.len()].copy_from_slice(s.as_bytes());
CheetahString {
inner: InnerString::Inline {
len: s.len() as u8,
data,
},
}
} else {
// Use Arc for long strings
CheetahString {
inner: InnerString::ArcString(Arc::new(s.to_owned())),
}
}
}

#[inline]
pub fn from_string(s: String) -> Self {
CheetahString {
inner: InnerString::ArcString(Arc::new(s)),
if s.len() <= INLINE_CAPACITY {
// Use inline storage for short strings
let mut data = [0u8; INLINE_CAPACITY];
data[..s.len()].copy_from_slice(s.as_bytes());
CheetahString {
inner: InnerString::Inline {
len: s.len() as u8,
data,
},
}
} else {
// Use Arc for long strings
CheetahString {
inner: InnerString::ArcString(Arc::new(s)),
}
}
}
#[inline]
Expand All @@ -352,8 +385,13 @@ impl CheetahString {
#[inline]
pub fn as_str(&self) -> &str {
match &self.inner {
InnerString::ArcString(s) => s.as_str(),
InnerString::Inline { len, data } => {
// SAFETY: Inline strings are only created from valid UTF-8 sources.
// The data is always valid UTF-8 up to len bytes.
unsafe { std::str::from_utf8_unchecked(&data[..*len as usize]) }
}
InnerString::StaticStr(s) => s,
InnerString::ArcString(s) => s.as_str(),
InnerString::ArcVecString(s) => {
// SAFETY: ArcVecString is only created from validated UTF-8 sources.
// All constructors ensure this invariant is maintained.
Expand All @@ -365,43 +403,42 @@ impl CheetahString {
// The from_bytes constructor ensures this invariant.
unsafe { std::str::from_utf8_unchecked(b.as_ref()) }
}
InnerString::Empty => EMPTY_STRING,
}
}

#[inline]
pub fn as_bytes(&self) -> &[u8] {
match &self.inner {
InnerString::ArcString(s) => s.as_bytes(),
InnerString::Inline { len, data } => &data[..*len as usize],
InnerString::StaticStr(s) => s.as_bytes(),
InnerString::ArcString(s) => s.as_bytes(),
InnerString::ArcVecString(s) => s.as_ref(),
#[cfg(feature = "bytes")]
InnerString::Bytes(b) => b.as_ref(),
InnerString::Empty => &[],
}
}

#[inline]
pub fn len(&self) -> usize {
match &self.inner {
InnerString::ArcString(s) => s.len(),
InnerString::Inline { len, .. } => *len as usize,
InnerString::StaticStr(s) => s.len(),
InnerString::ArcString(s) => s.len(),
InnerString::ArcVecString(s) => s.len(),
#[cfg(feature = "bytes")]
InnerString::Bytes(b) => b.len(),
InnerString::Empty => 0,
}
}

#[inline]
pub fn is_empty(&self) -> bool {
match &self.inner {
InnerString::ArcString(s) => s.is_empty(),
InnerString::Inline { len, .. } => *len == 0,
InnerString::StaticStr(s) => s.is_empty(),
InnerString::ArcString(s) => s.is_empty(),
InnerString::ArcVecString(s) => s.is_empty(),
#[cfg(feature = "bytes")]
InnerString::Bytes(b) => b.is_empty(),
InnerString::Empty => true,
}
}
}
Expand Down Expand Up @@ -506,20 +543,35 @@ impl Borrow<str> for CheetahString {
}
}

/// Maximum capacity for inline string storage (23 bytes + 1 byte for length = 24 bytes total)
const INLINE_CAPACITY: usize = 23;

/// The `InnerString` enum represents different types of string storage.
///
/// This enum uses Small String Optimization (SSO) to avoid heap allocations for short strings.
///
/// Variants:
///
/// * `ArcString(Arc<String>)` - A reference-counted string.
/// * `StaticStr(&'static str)` - A static string slice.
/// * `Inline` - Inline storage for strings <= 23 bytes (zero heap allocations).
/// * `StaticStr(&'static str)` - A static string slice (zero heap allocations).
/// * `ArcString(Arc<String>)` - A reference-counted string (one heap allocation).
/// * `ArcVecString(Arc<Vec<u8>>)` - A reference-counted byte vector.
/// * `Bytes(bytes::Bytes)` - A byte buffer (available when the "bytes" feature is enabled).
/// * `Empty` - An empty string.
#[derive(Clone)]
pub(super) enum InnerString {
ArcString(Arc<String>),
/// Inline storage for short strings (up to 23 bytes).
/// Stores the length and data directly without heap allocation.
Inline {
len: u8,
data: [u8; INLINE_CAPACITY],
},
/// Static string slice with 'static lifetime.
StaticStr(&'static str),
/// Reference-counted heap-allocated string.
ArcString(Arc<String>),
/// Reference-counted heap-allocated byte vector.
ArcVecString(Arc<Vec<u8>>),
/// Bytes type integration (requires "bytes" feature).
#[cfg(feature = "bytes")]
Bytes(bytes::Bytes),
Empty,
}
8 changes: 6 additions & 2 deletions src/serde.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use crate::cheetah_string::{InnerString, EMPTY_STRING};
use crate::cheetah_string::InnerString;
use crate::CheetahString;
use serde::de::{Error, Unexpected, Visitor};
use serde::{Deserialize, Deserializer, Serialize, Serializer};
Expand All @@ -9,12 +9,16 @@ impl Serialize for CheetahString {
S: Serializer,
{
match &self.inner {
InnerString::Inline { len, data } => {
// Safety: InnerString::Inline guarantees that data[0..len] is valid UTF-8
let s = unsafe { std::str::from_utf8_unchecked(&data[..*len as usize]) };
serializer.serialize_str(s)
}
InnerString::ArcString(s) => serializer.serialize_str(s.as_str()),
InnerString::StaticStr(s) => serializer.serialize_str(s),
InnerString::ArcVecString(s) => serializer.serialize_bytes(s),
#[cfg(feature = "bytes")]
InnerString::Bytes(bytes) => serializer.serialize_bytes(bytes.as_ref()),
InnerString::Empty => serializer.serialize_str(EMPTY_STRING),
}
}
}
Expand Down
Loading
Loading