Skip to content

Commit 70a0157

Browse files
committed
perf: add WordRegister for efficient word-sized byte operations
1 parent ec11180 commit 70a0157

File tree

1 file changed

+174
-13
lines changed

1 file changed

+174
-13
lines changed

src/header/name.rs

Lines changed: 174 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -989,6 +989,9 @@ standard_headers! {
989989
(XXssProtection, X_XSS_PROTECTION, b"x-xss-protection");
990990
}
991991

992+
/// The size of a machine word in bytes.
993+
const WORD_SIZE: usize = std::mem::size_of::<usize>();
994+
992995
/// Valid header name characters
993996
///
994997
/// ```not_rust
@@ -1070,6 +1073,59 @@ const HEADER_CHARS_H2: [u8; 256] = [
10701073
0, 0, 0, 0, 0, 0 // 25x
10711074
];
10721075

1076+
pub(crate) struct WordRegister {
1077+
val: usize,
1078+
}
1079+
1080+
/// A helper structure for performing word-sized operations on bytes.
1081+
///
1082+
/// `WordRegister` wraps a `usize` to allow efficient checking and manipulation of bytes
1083+
/// within a machine word. This is typically used for optimizing string scanning or
1084+
/// parsing operations where checking 4 or 8 bytes at a time is faster than checking
1085+
/// them individually.
1086+
impl WordRegister {
1087+
/// Creates a new, zero‑initialised buffer.
1088+
#[inline]
1089+
pub const fn new() -> Self {
1090+
Self { val: 0 }
1091+
}
1092+
1093+
/// Returns `true` if any byte in the buffer is zero (null byte).
1094+
///
1095+
/// This method uses bitwise hacks (often referred to as the "Mycroft" or "Alan Mycroft"
1096+
/// algorithm) to determine if a zero byte exists within the word without branching
1097+
/// on individual bytes.
1098+
#[inline]
1099+
pub fn contains_zero(&self) -> bool {
1100+
let val = self.val;
1101+
const ONES: usize = usize::MAX / 0xFF;
1102+
const HIGHS: usize = ONES << 7;
1103+
((val.wrapping_sub(ONES)) & !val & HIGHS) != 0
1104+
}
1105+
1106+
/// Writes a single byte at the given byte index.
1107+
///
1108+
/// The index `idx` corresponds to the byte position within the `usize`, where `0`
1109+
/// is the most significant byte.
1110+
///
1111+
/// # Arguments
1112+
///
1113+
/// * `idx` - The byte index to write to (0-indexed).
1114+
/// * `byte` - The `u8` value to write.
1115+
#[inline]
1116+
pub fn set(&mut self, idx: usize, byte: u8) {
1117+
let shift = (core::mem::size_of::<usize>() - 1 - idx) * 8;
1118+
let mask = !(0xFFusize << shift);
1119+
self.val = (self.val & mask) | ((byte as usize) << shift);
1120+
}
1121+
1122+
/// Returns the raw `usize` value contained in the register.
1123+
#[inline]
1124+
pub const fn value(&self) -> usize {
1125+
self.val
1126+
}
1127+
}
1128+
10731129
fn parse_hdr<'a>(
10741130
data: &'a [u8],
10751131
b: &'a mut [MaybeUninit<u8>; SCRATCH_BUF_SIZE],
@@ -1079,20 +1135,49 @@ fn parse_hdr<'a>(
10791135
0 => Err(InvalidHeaderName::new()),
10801136
len @ 1..=SCRATCH_BUF_SIZE => {
10811137
// Read from data into the buffer - transforming using `table` as we go
1082-
data.iter()
1083-
.zip(b.iter_mut())
1084-
.for_each(|(index, out)| *out = MaybeUninit::new(table[*index as usize]));
1138+
let mut i = 0;
1139+
1140+
if WORD_SIZE >= 4 {
1141+
let mut register = WordRegister::new();
1142+
while i + WORD_SIZE <= len {
1143+
let chunk = &data[i..i + WORD_SIZE];
1144+
1145+
for (j, b) in chunk.iter().enumerate() {
1146+
let b = table[*b as usize];
1147+
register.set(j, b);
1148+
}
1149+
1150+
if register.contains_zero() {
1151+
// Found a zero byte, break to process remaining bytes one by one
1152+
return Err(InvalidHeaderName::new());
1153+
}
1154+
1155+
// Safety: We are writing initialized bytes (usize) into MaybeUninit<u8> array.
1156+
// This is valid because MaybeUninit<u8> has the same layout as u8, and we are
1157+
// writing a chunk of bytes.
1158+
unsafe {
1159+
let ptr = b.as_mut_ptr().add(i) as *mut [u8; WORD_SIZE];
1160+
std::ptr::write_unaligned(ptr, register.value().to_be_bytes());
1161+
}
1162+
1163+
i += WORD_SIZE;
1164+
}
1165+
}
1166+
1167+
// Process the remainder bytes
1168+
while i < len {
1169+
let v = table[data[i] as usize];
1170+
if v == 0 {
1171+
return Err(InvalidHeaderName::new());
1172+
}
1173+
b[i] = MaybeUninit::new(v);
1174+
i += 1;
1175+
}
10851176
// Safety: len bytes of b were just initialized.
10861177
let name: &'a [u8] = unsafe { slice_assume_init(&b[0..len]) };
10871178
match StandardHeader::from_bytes(name) {
10881179
Some(sh) => Ok(sh.into()),
1089-
None => {
1090-
if name.contains(&0) {
1091-
Err(InvalidHeaderName::new())
1092-
} else {
1093-
Ok(HdrName::custom(name, true))
1094-
}
1095-
}
1180+
None => Ok(HdrName::custom(name, true)),
10961181
}
10971182
}
10981183
SCRATCH_BUF_OVERFLOW..=super::MAX_HEADER_NAME_LEN => Ok(HdrName::custom(data, false)),
@@ -1123,10 +1208,41 @@ impl HeaderName {
11231208
let val = unsafe { ByteStr::from_utf8_unchecked(buf) };
11241209
Ok(Custom(val).into())
11251210
}
1126-
Repr::Custom(MaybeLower { buf, lower: false }) => {
1211+
Repr::Custom(MaybeLower {
1212+
mut buf,
1213+
lower: false,
1214+
}) => {
11271215
use bytes::BufMut;
11281216
let mut dst = BytesMut::with_capacity(buf.len());
11291217

1218+
if WORD_SIZE >= 4 {
1219+
let mut register = WordRegister::new();
1220+
while buf.len() >= WORD_SIZE {
1221+
let chunk = &buf[..WORD_SIZE];
1222+
1223+
for (i, b) in chunk.iter().enumerate() {
1224+
// HEADER_CHARS maps all bytes to valid single-byte UTF-8
1225+
let b = HEADER_CHARS[*b as usize];
1226+
register.set(i, b);
1227+
}
1228+
1229+
if register.contains_zero() {
1230+
return Err(InvalidHeaderName::new());
1231+
}
1232+
1233+
#[cfg(target_pointer_width = "64")]
1234+
{
1235+
dst.put_u64(register.value() as u64);
1236+
}
1237+
#[cfg(target_pointer_width = "32")]
1238+
{
1239+
dst.put_u32(register.value() as u32);
1240+
}
1241+
1242+
buf = &buf[WORD_SIZE..];
1243+
}
1244+
}
1245+
// process the reminder bytes
11301246
for b in buf.iter() {
11311247
// HEADER_CHARS maps all bytes to valid single-byte UTF-8
11321248
let b = HEADER_CHARS[*b as usize];
@@ -1178,7 +1294,27 @@ impl HeaderName {
11781294
Ok(Custom(val).into())
11791295
}
11801296
Repr::Custom(MaybeLower { buf, lower: false }) => {
1181-
for &b in buf.iter() {
1297+
let mut check_buf = buf;
1298+
1299+
if WORD_SIZE >= 4 {
1300+
let mut register = WordRegister::new();
1301+
while check_buf.len() >= WORD_SIZE {
1302+
let chunk = &check_buf[..WORD_SIZE];
1303+
1304+
for (i, b) in chunk.iter().enumerate() {
1305+
let b = HEADER_CHARS_H2[*b as usize];
1306+
register.set(i, b);
1307+
}
1308+
1309+
if register.contains_zero() {
1310+
return Err(InvalidHeaderName::new());
1311+
}
1312+
1313+
check_buf = &check_buf[WORD_SIZE..];
1314+
}
1315+
}
1316+
1317+
for &b in check_buf.iter() {
11821318
// HEADER_CHARS_H2 maps all bytes that are not valid single-byte
11831319
// UTF-8 to 0 so this check returns an error for invalid UTF-8.
11841320
if HEADER_CHARS_H2[b as usize] == 0 {
@@ -1543,8 +1679,33 @@ impl<'a> From<HdrName<'a>> for HeaderName {
15431679
} else {
15441680
use bytes::BufMut;
15451681
let mut dst = BytesMut::with_capacity(maybe_lower.buf.len());
1682+
let mut buf = maybe_lower.buf;
1683+
1684+
if WORD_SIZE >= 4 {
1685+
let mut register = WordRegister::new();
1686+
while buf.len() >= WORD_SIZE {
1687+
let chunk = &maybe_lower.buf[..WORD_SIZE];
1688+
1689+
for (i, b) in chunk.iter().enumerate() {
1690+
// HEADER_CHARS maps all bytes to valid single-byte UTF-8
1691+
let b = HEADER_CHARS[*b as usize];
1692+
register.set(i, b);
1693+
}
1694+
1695+
#[cfg(target_pointer_width = "64")]
1696+
{
1697+
dst.put_u64(register.value() as u64);
1698+
}
1699+
#[cfg(target_pointer_width = "32")]
1700+
{
1701+
dst.put_u32(register.value() as u32);
1702+
}
1703+
1704+
buf = &buf[WORD_SIZE..];
1705+
}
1706+
}
15461707

1547-
for b in maybe_lower.buf.iter() {
1708+
for b in buf.iter() {
15481709
// HEADER_CHARS maps each byte to a valid single-byte UTF-8
15491710
// codepoint.
15501711
dst.put_u8(HEADER_CHARS[*b as usize]);

0 commit comments

Comments
 (0)