@@ -26,53 +26,60 @@ use encoding_rs::Encoding;
2626// we are safe here, since we'll just get first character shifted left
2727// by zeroes as repetitave 1 digits get added to the hash.
2828//
29+ // LocalNameHash is built incrementally as tags are parsed, so it needs
30+ // to be able to invalidate itself if parsing an unrepresentable name.
31+ // `EMPTY_HASH` is used as a sentinel value.
32+ //
2933// Pub only for integration tests
3034#[ derive( Debug , PartialEq , Eq , Copy , Clone , Default , Hash ) ]
31- pub struct LocalNameHash ( Option < u64 > ) ;
35+ pub struct LocalNameHash ( u64 ) ;
36+
37+ const EMPTY_HASH : u64 = !0 ;
3238
3339impl LocalNameHash {
3440 #[ inline]
3541 #[ must_use]
3642 pub const fn new ( ) -> Self {
37- Self ( Some ( 0 ) )
43+ Self ( 0 )
3844 }
3945
4046 #[ inline]
4147 #[ must_use]
4248 pub const fn is_empty ( & self ) -> bool {
43- self . 0 . is_none ( )
49+ self . 0 == EMPTY_HASH
4450 }
4551
4652 #[ inline]
4753 pub fn update ( & mut self , ch : u8 ) {
48- if let Some ( h) = self . 0 {
49- // NOTE: check if we still have space for yet another
50- // character and if not then invalidate the hash.
51- // Note, that we can't have `1` (which is encoded as 0b00000) as
52- // a first character of a tag name, so it's safe to perform
53- // check this way.
54- self . 0 = if h >> ( 64 - 5 ) == 0 {
55- match ch {
56- // NOTE: apply 0x1F mask on ASCII alpha to convert it to the
57- // number from 1 to 26 (character case is controlled by one of
58- // upper bits which we eliminate with the mask). Then add
59- // 5, since numbers from 0 to 5 are reserved for digits.
60- // Aftwerards put result as 5 lower bits of the hash.
61- b'a' ..=b'z' | b'A' ..=b'Z' => Some ( ( h << 5 ) | ( ( u64:: from ( ch) & 0x1F ) + 5 ) ) ,
62-
63- // NOTE: apply 0x0F mask on ASCII digit to convert it to number
64- // from 1 to 6. Then subtract 1 to make it zero-based.
65- // Afterwards, put result as lower bits of the hash.
66- b'1' ..=b'6' => Some ( ( h << 5 ) | ( ( u64:: from ( ch) & 0x0F ) - 1 ) ) ,
67-
68- // NOTE: for any other characters hash function is not
69- // applicable, so we completely invalidate the hash.
70- _ => None ,
71- }
72- } else {
73- None
74- } ;
75- }
54+ let h = self . 0 ;
55+
56+ // NOTE: check if we still have space for yet another
57+ // character and if not then invalidate the hash.
58+ // Note, that we can't have `1` (which is encoded as 0b00000) as
59+ // a first character of a tag name, so it's safe to perform
60+ // check this way.
61+ // EMPTY_HASH has all bits set, so it will fail this check.
62+ self . 0 = if h >> ( 64 - 5 ) == 0 {
63+ match ch {
64+ // NOTE: apply 0x1F mask on ASCII alpha to convert it to the
65+ // number from 1 to 26 (character case is controlled by one of
66+ // upper bits which we eliminate with the mask). Then add
67+ // 5, since numbers from 0 to 5 are reserved for digits.
68+ // Aftwerards put result as 5 lower bits of the hash.
69+ b'a' ..=b'z' | b'A' ..=b'Z' => ( h << 5 ) | ( ( u64:: from ( ch) & 0x1F ) + 5 ) ,
70+
71+ // NOTE: apply 0x0F mask on ASCII digit to convert it to number
72+ // from 1 to 6. Then subtract 1 to make it zero-based.
73+ // Afterwards, put result as lower bits of the hash.
74+ b'1' ..=b'6' => ( h << 5 ) | ( ( u64:: from ( ch) & 0x0F ) - 1 ) ,
75+
76+ // NOTE: for any other characters hash function is not
77+ // applicable, so we completely invalidate the hash.
78+ _ => EMPTY_HASH ,
79+ }
80+ } else {
81+ EMPTY_HASH
82+ } ;
7683 }
7784}
7885
@@ -92,10 +99,7 @@ impl From<&str> for LocalNameHash {
9299impl PartialEq < Tag > for LocalNameHash {
93100 #[ inline]
94101 fn eq ( & self , tag : & Tag ) -> bool {
95- match self . 0 {
96- Some ( h) => * tag as u64 == h,
97- None => false ,
98- }
102+ self . 0 == * tag as u64
99103 }
100104}
101105
@@ -159,7 +163,10 @@ impl PartialEq<LocalName<'_>> for LocalName<'_> {
159163 use LocalName :: { Bytes , Hash } ;
160164
161165 match ( self , other) {
162- ( Hash ( s) , Hash ( o) ) => s == o,
166+ ( Hash ( s) , Hash ( o) ) => {
167+ debug_assert ! ( !s. is_empty( ) ) ;
168+ s == o
169+ }
163170 ( Bytes ( s) , Bytes ( o) ) => s. eq_ignore_ascii_case ( o) ,
164171 _ => false ,
165172 }
@@ -172,7 +179,7 @@ mod tests {
172179
173180 #[ test]
174181 fn from_str ( ) {
175- assert_eq ! ( LocalNameHash :: from( "div" ) , LocalNameHash ( Some ( 9691u64 ) ) ) ;
182+ assert_eq ! ( LocalNameHash :: from( "div" ) , LocalNameHash ( 9691u64 ) ) ;
176183 }
177184
178185 #[ test]
0 commit comments