@@ -989,6 +989,9 @@ standard_headers! {
989989 ( XXssProtection , X_XSS_PROTECTION , b"x-xss-protection" ) ;
990990}
991991
992+ /// The size of a machine word in bytes.
993+ const WORD_SIZE : usize = std:: mem:: size_of :: < usize > ( ) ;
994+
992995/// Valid header name characters
993996///
994997/// ```not_rust
@@ -1070,6 +1073,59 @@ const HEADER_CHARS_H2: [u8; 256] = [
10701073 0 , 0 , 0 , 0 , 0 , 0 // 25x
10711074] ;
10721075
1076+ pub ( crate ) struct WordRegister {
1077+ val : usize ,
1078+ }
1079+
1080+ /// A helper structure for performing word-sized operations on bytes.
1081+ ///
1082+ /// `WordRegister` wraps a `usize` to allow efficient checking and manipulation of bytes
1083+ /// within a machine word. This is typically used for optimizing string scanning or
1084+ /// parsing operations where checking 4 or 8 bytes at a time is faster than checking
1085+ /// them individually.
1086+ impl WordRegister {
1087+ /// Creates a new, zero‑initialised buffer.
1088+ #[ inline]
1089+ pub const fn new ( ) -> Self {
1090+ Self { val : 0 }
1091+ }
1092+
1093+ /// Returns `true` if any byte in the buffer is zero (null byte).
1094+ ///
1095+ /// This method uses bitwise hacks (often referred to as the "Mycroft" or "Alan Mycroft"
1096+ /// algorithm) to determine if a zero byte exists within the word without branching
1097+ /// on individual bytes.
1098+ #[ inline]
1099+ pub fn contains_zero ( & self ) -> bool {
1100+ let val = self . val ;
1101+ const ONES : usize = usize:: MAX / 0xFF ;
1102+ const HIGHS : usize = ONES << 7 ;
1103+ ( ( val. wrapping_sub ( ONES ) ) & !val & HIGHS ) != 0
1104+ }
1105+
1106+ /// Writes a single byte at the given byte index.
1107+ ///
1108+ /// The index `idx` corresponds to the byte position within the `usize`, where `0`
1109+ /// is the most significant byte.
1110+ ///
1111+ /// # Arguments
1112+ ///
1113+ /// * `idx` - The byte index to write to (0-indexed).
1114+ /// * `byte` - The `u8` value to write.
1115+ #[ inline]
1116+ pub fn set ( & mut self , idx : usize , byte : u8 ) {
1117+ let shift = ( core:: mem:: size_of :: < usize > ( ) - 1 - idx) * 8 ;
1118+ let mask = !( 0xFFusize << shift) ;
1119+ self . val = ( self . val & mask) | ( ( byte as usize ) << shift) ;
1120+ }
1121+
1122+ /// Returns the raw `usize` value contained in the register.
1123+ #[ inline]
1124+ pub const fn value ( & self ) -> usize {
1125+ self . val
1126+ }
1127+ }
1128+
10731129fn parse_hdr < ' a > (
10741130 data : & ' a [ u8 ] ,
10751131 b : & ' a mut [ MaybeUninit < u8 > ; SCRATCH_BUF_SIZE ] ,
@@ -1079,20 +1135,49 @@ fn parse_hdr<'a>(
10791135 0 => Err ( InvalidHeaderName :: new ( ) ) ,
10801136 len @ 1 ..=SCRATCH_BUF_SIZE => {
10811137 // Read from data into the buffer - transforming using `table` as we go
1082- data. iter ( )
1083- . zip ( b. iter_mut ( ) )
1084- . for_each ( |( index, out) | * out = MaybeUninit :: new ( table[ * index as usize ] ) ) ;
1138+ let mut i = 0 ;
1139+
1140+ if WORD_SIZE >= 4 {
1141+ let mut register = WordRegister :: new ( ) ;
1142+ while i + WORD_SIZE <= len {
1143+ let chunk = & data[ i..i + WORD_SIZE ] ;
1144+
1145+ for ( j, b) in chunk. iter ( ) . enumerate ( ) {
1146+ let b = table[ * b as usize ] ;
1147+ register. set ( j, b) ;
1148+ }
1149+
1150+ if register. contains_zero ( ) {
1151+ // Found a zero byte, break to process remaining bytes one by one
1152+ return Err ( InvalidHeaderName :: new ( ) ) ;
1153+ }
1154+
1155+ // Safety: We are writing initialized bytes (usize) into MaybeUninit<u8> array.
1156+ // This is valid because MaybeUninit<u8> has the same layout as u8, and we are
1157+ // writing a chunk of bytes.
1158+ unsafe {
1159+ let ptr = b. as_mut_ptr ( ) . add ( i) as * mut [ u8 ; WORD_SIZE ] ;
1160+ std:: ptr:: write_unaligned ( ptr, register. value ( ) . to_be_bytes ( ) ) ;
1161+ }
1162+
1163+ i += WORD_SIZE ;
1164+ }
1165+ }
1166+
1167+ // Process the remainder bytes
1168+ while i < len {
1169+ let v = table[ data[ i] as usize ] ;
1170+ if v == 0 {
1171+ return Err ( InvalidHeaderName :: new ( ) ) ;
1172+ }
1173+ b[ i] = MaybeUninit :: new ( v) ;
1174+ i += 1 ;
1175+ }
10851176 // Safety: len bytes of b were just initialized.
10861177 let name: & ' a [ u8 ] = unsafe { slice_assume_init ( & b[ 0 ..len] ) } ;
10871178 match StandardHeader :: from_bytes ( name) {
10881179 Some ( sh) => Ok ( sh. into ( ) ) ,
1089- None => {
1090- if name. contains ( & 0 ) {
1091- Err ( InvalidHeaderName :: new ( ) )
1092- } else {
1093- Ok ( HdrName :: custom ( name, true ) )
1094- }
1095- }
1180+ None => Ok ( HdrName :: custom ( name, true ) ) ,
10961181 }
10971182 }
10981183 SCRATCH_BUF_OVERFLOW ..=super :: MAX_HEADER_NAME_LEN => Ok ( HdrName :: custom ( data, false ) ) ,
@@ -1123,10 +1208,41 @@ impl HeaderName {
11231208 let val = unsafe { ByteStr :: from_utf8_unchecked ( buf) } ;
11241209 Ok ( Custom ( val) . into ( ) )
11251210 }
1126- Repr :: Custom ( MaybeLower { buf, lower : false } ) => {
1211+ Repr :: Custom ( MaybeLower {
1212+ mut buf,
1213+ lower : false ,
1214+ } ) => {
11271215 use bytes:: BufMut ;
11281216 let mut dst = BytesMut :: with_capacity ( buf. len ( ) ) ;
11291217
1218+ if WORD_SIZE >= 4 {
1219+ let mut register = WordRegister :: new ( ) ;
1220+ while buf. len ( ) >= WORD_SIZE {
1221+ let chunk = & buf[ ..WORD_SIZE ] ;
1222+
1223+ for ( i, b) in chunk. iter ( ) . enumerate ( ) {
1224+ // HEADER_CHARS maps all bytes to valid single-byte UTF-8
1225+ let b = HEADER_CHARS [ * b as usize ] ;
1226+ register. set ( i, b) ;
1227+ }
1228+
1229+ if register. contains_zero ( ) {
1230+ return Err ( InvalidHeaderName :: new ( ) ) ;
1231+ }
1232+
1233+ #[ cfg( target_pointer_width = "64" ) ]
1234+ {
1235+ dst. put_u64 ( register. value ( ) as u64 ) ;
1236+ }
1237+ #[ cfg( target_pointer_width = "32" ) ]
1238+ {
1239+ dst. put_u32 ( register. value ( ) as u32 ) ;
1240+ }
1241+
1242+ buf = & buf[ WORD_SIZE ..] ;
1243+ }
1244+ }
1245+ // process the reminder bytes
11301246 for b in buf. iter ( ) {
11311247 // HEADER_CHARS maps all bytes to valid single-byte UTF-8
11321248 let b = HEADER_CHARS [ * b as usize ] ;
@@ -1178,7 +1294,27 @@ impl HeaderName {
11781294 Ok ( Custom ( val) . into ( ) )
11791295 }
11801296 Repr :: Custom ( MaybeLower { buf, lower : false } ) => {
1181- for & b in buf. iter ( ) {
1297+ let mut check_buf = buf;
1298+
1299+ if WORD_SIZE >= 4 {
1300+ let mut register = WordRegister :: new ( ) ;
1301+ while check_buf. len ( ) >= WORD_SIZE {
1302+ let chunk = & check_buf[ ..WORD_SIZE ] ;
1303+
1304+ for ( i, b) in chunk. iter ( ) . enumerate ( ) {
1305+ let b = HEADER_CHARS_H2 [ * b as usize ] ;
1306+ register. set ( i, b) ;
1307+ }
1308+
1309+ if register. contains_zero ( ) {
1310+ return Err ( InvalidHeaderName :: new ( ) ) ;
1311+ }
1312+
1313+ check_buf = & check_buf[ WORD_SIZE ..] ;
1314+ }
1315+ }
1316+
1317+ for & b in check_buf. iter ( ) {
11821318 // HEADER_CHARS_H2 maps all bytes that are not valid single-byte
11831319 // UTF-8 to 0 so this check returns an error for invalid UTF-8.
11841320 if HEADER_CHARS_H2 [ b as usize ] == 0 {
@@ -1543,8 +1679,33 @@ impl<'a> From<HdrName<'a>> for HeaderName {
15431679 } else {
15441680 use bytes:: BufMut ;
15451681 let mut dst = BytesMut :: with_capacity ( maybe_lower. buf . len ( ) ) ;
1682+ let mut buf = maybe_lower. buf ;
1683+
1684+ if WORD_SIZE >= 4 {
1685+ let mut register = WordRegister :: new ( ) ;
1686+ while buf. len ( ) >= WORD_SIZE {
1687+ let chunk = & maybe_lower. buf [ ..WORD_SIZE ] ;
1688+
1689+ for ( i, b) in chunk. iter ( ) . enumerate ( ) {
1690+ // HEADER_CHARS maps all bytes to valid single-byte UTF-8
1691+ let b = HEADER_CHARS [ * b as usize ] ;
1692+ register. set ( i, b) ;
1693+ }
1694+
1695+ #[ cfg( target_pointer_width = "64" ) ]
1696+ {
1697+ dst. put_u64 ( register. value ( ) as u64 ) ;
1698+ }
1699+ #[ cfg( target_pointer_width = "32" ) ]
1700+ {
1701+ dst. put_u32 ( register. value ( ) as u32 ) ;
1702+ }
1703+
1704+ buf = & buf[ WORD_SIZE ..] ;
1705+ }
1706+ }
15461707
1547- for b in maybe_lower . buf . iter ( ) {
1708+ for b in buf. iter ( ) {
15481709 // HEADER_CHARS maps each byte to a valid single-byte UTF-8
15491710 // codepoint.
15501711 dst. put_u8 ( HEADER_CHARS [ * b as usize ] ) ;
0 commit comments