Skip to content

Commit 99c4e23

Browse files
committed
Make LocalNameHash smaller
1 parent caea7f9 commit 99c4e23

File tree

1 file changed

+44
-37
lines changed

1 file changed

+44
-37
lines changed

src/html/local_name.rs

Lines changed: 44 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -26,53 +26,60 @@ use encoding_rs::Encoding;
2626
// we are safe here, since we'll just get first character shifted left
2727
// by zeroes as repetitave 1 digits get added to the hash.
2828
//
29+
// LocalNameHash is built incrementally as tags are parsed, so it needs
30+
// to be able to invalidate itself if parsing an unrepresentable name.
31+
// `EMPTY_HASH` is used as a sentinel value.
32+
//
2933
// Pub only for integration tests
3034
#[derive(Debug, PartialEq, Eq, Copy, Clone, Default, Hash)]
31-
pub struct LocalNameHash(Option<u64>);
35+
pub struct LocalNameHash(u64);
36+
37+
const EMPTY_HASH: u64 = !0;
3238

3339
impl LocalNameHash {
3440
#[inline]
3541
#[must_use]
3642
pub const fn new() -> Self {
37-
Self(Some(0))
43+
Self(0)
3844
}
3945

4046
#[inline]
4147
#[must_use]
4248
pub const fn is_empty(&self) -> bool {
43-
self.0.is_none()
49+
self.0 == EMPTY_HASH
4450
}
4551

4652
#[inline]
4753
pub fn update(&mut self, ch: u8) {
48-
if let Some(h) = self.0 {
49-
// NOTE: check if we still have space for yet another
50-
// character and if not then invalidate the hash.
51-
// Note, that we can't have `1` (which is encoded as 0b00000) as
52-
// a first character of a tag name, so it's safe to perform
53-
// check this way.
54-
self.0 = if h >> (64 - 5) == 0 {
55-
match ch {
56-
// NOTE: apply 0x1F mask on ASCII alpha to convert it to the
57-
// number from 1 to 26 (character case is controlled by one of
58-
// upper bits which we eliminate with the mask). Then add
59-
// 5, since numbers from 0 to 5 are reserved for digits.
60-
// Aftwerards put result as 5 lower bits of the hash.
61-
b'a'..=b'z' | b'A'..=b'Z' => Some((h << 5) | ((u64::from(ch) & 0x1F) + 5)),
62-
63-
// NOTE: apply 0x0F mask on ASCII digit to convert it to number
64-
// from 1 to 6. Then subtract 1 to make it zero-based.
65-
// Afterwards, put result as lower bits of the hash.
66-
b'1'..=b'6' => Some((h << 5) | ((u64::from(ch) & 0x0F) - 1)),
67-
68-
// NOTE: for any other characters hash function is not
69-
// applicable, so we completely invalidate the hash.
70-
_ => None,
71-
}
72-
} else {
73-
None
74-
};
75-
}
54+
let h = self.0;
55+
56+
// NOTE: check if we still have space for yet another
57+
// character and if not then invalidate the hash.
58+
// Note, that we can't have `1` (which is encoded as 0b00000) as
59+
// a first character of a tag name, so it's safe to perform
60+
// check this way.
61+
// EMPTY_HASH has all bits set, so it will fail this check.
62+
self.0 = if h >> (64 - 5) == 0 {
63+
match ch {
64+
// NOTE: apply 0x1F mask on ASCII alpha to convert it to the
65+
// number from 1 to 26 (character case is controlled by one of
66+
// upper bits which we eliminate with the mask). Then add
67+
// 5, since numbers from 0 to 5 are reserved for digits.
68+
// Aftwerards put result as 5 lower bits of the hash.
69+
b'a'..=b'z' | b'A'..=b'Z' => (h << 5) | ((u64::from(ch) & 0x1F) + 5),
70+
71+
// NOTE: apply 0x0F mask on ASCII digit to convert it to number
72+
// from 1 to 6. Then subtract 1 to make it zero-based.
73+
// Afterwards, put result as lower bits of the hash.
74+
b'1'..=b'6' => (h << 5) | ((u64::from(ch) & 0x0F) - 1),
75+
76+
// NOTE: for any other characters hash function is not
77+
// applicable, so we completely invalidate the hash.
78+
_ => EMPTY_HASH,
79+
}
80+
} else {
81+
EMPTY_HASH
82+
};
7683
}
7784
}
7885

@@ -92,10 +99,7 @@ impl From<&str> for LocalNameHash {
9299
impl PartialEq<Tag> for LocalNameHash {
93100
#[inline]
94101
fn eq(&self, tag: &Tag) -> bool {
95-
match self.0 {
96-
Some(h) => *tag as u64 == h,
97-
None => false,
98-
}
102+
self.0 == *tag as u64
99103
}
100104
}
101105

@@ -159,7 +163,10 @@ impl PartialEq<LocalName<'_>> for LocalName<'_> {
159163
use LocalName::{Bytes, Hash};
160164

161165
match (self, other) {
162-
(Hash(s), Hash(o)) => s == o,
166+
(Hash(s), Hash(o)) => {
167+
debug_assert!(!s.is_empty());
168+
s == o
169+
}
163170
(Bytes(s), Bytes(o)) => s.eq_ignore_ascii_case(o),
164171
_ => false,
165172
}
@@ -172,7 +179,7 @@ mod tests {
172179

173180
#[test]
174181
fn from_str() {
175-
assert_eq!(LocalNameHash::from("div"), LocalNameHash(Some(9691u64)));
182+
assert_eq!(LocalNameHash::from("div"), LocalNameHash(9691u64));
176183
}
177184

178185
#[test]

0 commit comments

Comments
 (0)