Skip to content

Commit 6e35465

Browse files
committed
Shrink Node struct to reduce binary size
Signed-off-by: Simon Wülker <[email protected]>
1 parent 6b0076d commit 6e35465

File tree

3 files changed

+55
-72
lines changed

3 files changed

+55
-72
lines changed

html5ever/build/main.rs

Lines changed: 3 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -238,14 +238,6 @@ impl DafsaBuilder {
238238
/// Computes all numbers needed for minimal perfect hashing
239239
fn compute_numbers(&mut self) {
240240
self.compute_numbers_for(0);
241-
// // Compute numbers for all nodes except the root node
242-
// for edge in self.nodes[0].edges {
243-
// let Some(edge) = edge else {
244-
// continue;
245-
// };
246-
247-
// self.compute_numbers_for(edge);
248-
// }
249241
}
250242

251243
/// Returns the perfect hash value for the input, or `None` if
@@ -343,18 +335,8 @@ fn main() {
343335
)
344336
.unwrap();
345337

346-
// Define the root node
347-
write!(
348-
&mut result,
349-
"Node {{
350-
code_point: 0,
351-
first_child_index: 1,
352-
is_last_child: true,
353-
is_terminal: false,
354-
num_nodes: 0
355-
}},"
356-
)
357-
.unwrap();
338+
// Define all nodes by traversing the DAFSA graph
339+
write!(&mut result, "Node::new(0, 0, false, true, 1),").unwrap();
358340
while let Some(handle) = stack.pop_front() {
359341
let node = &dafsa_builder.nodes[handle];
360342
let num_children = node.edges().count();
@@ -389,13 +371,7 @@ fn main() {
389371

390372
write!(
391373
&mut result,
392-
"Node {{
393-
code_point: {code_point},
394-
first_child_index: {first_child_index},
395-
is_last_child: {is_last_child},
396-
is_terminal: {is_terminal},
397-
num_nodes: {hash_value}
398-
}},",
374+
"Node::new({code_point}, {hash_value}, {is_terminal}, {is_last_child}, {first_child_index}),",
399375
is_terminal = child.is_terminal,
400376
hash_value = child.num_nodes
401377
)

html5ever/src/tokenizer/char_ref/codegen.rs

Lines changed: 51 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -11,26 +11,62 @@ use crate::tokenizer::CharRef;
1111

1212
include!(concat!(env!("OUT_DIR"), "/named_entities_graph.rs"));
1313

14+
/// A single node in the DAFSA.
15+
///
16+
/// For memory efficiency reasons, this is packed in 32 bits. The memory representation is as follows:
17+
/// * 8 bits: code point
18+
/// * 8 bits: hash value
1419
#[derive(Clone, Copy, Debug)]
15-
pub(crate) struct Node {
16-
first_child_index: usize,
17-
code_point: u8,
18-
is_last_child: bool,
19-
is_terminal: bool,
20-
num_nodes: u8,
21-
}
20+
pub(crate) struct Node(u32);
2221

2322
impl Node {
23+
const IS_TERMINAL: u32 = 1 << 15;
24+
const IS_LAST_CHILD: u32 = 1 << 14;
25+
26+
pub(crate) const fn new(
27+
code_point: u8,
28+
hash_value: u8,
29+
is_terminal: bool,
30+
is_last_child: bool,
31+
first_child_index: u16,
32+
) -> Self {
33+
let mut value = 0;
34+
value |= (code_point as u32) << 24;
35+
value |= (hash_value as u32) << 16;
36+
37+
if is_terminal {
38+
value |= Self::IS_TERMINAL;
39+
}
40+
41+
if is_last_child {
42+
value |= Self::IS_LAST_CHILD;
43+
}
44+
45+
assert!(first_child_index <= 0xFFF);
46+
47+
value |= first_child_index as u32;
48+
49+
Self(value)
50+
}
51+
2452
pub(crate) const fn code_point(&self) -> u8 {
25-
self.code_point
53+
(self.0 >> 24) as u8
2654
}
2755

28-
pub(crate) const fn num_nodes(&self) -> usize {
29-
self.num_nodes as usize
56+
pub(crate) const fn hash_value(&self) -> usize {
57+
((self.0 >> 16) & 0xFF) as usize
3058
}
3159

3260
pub(crate) const fn is_terminal(&self) -> bool {
33-
self.is_terminal
61+
(self.0 & Self::IS_TERMINAL) != 0
62+
}
63+
64+
const fn is_last_child(&self) -> bool {
65+
(self.0 & Self::IS_LAST_CHILD) != 0
66+
}
67+
68+
const fn first_child_index(&self) -> u16 {
69+
(self.0 & 0xFFF) as u16
3470
}
3571

3672
pub(crate) fn children(&self) -> impl Iterator<Item = &'static Node> {
@@ -49,51 +85,22 @@ impl Node {
4985
let node = &DAFSA_NODES[self.index];
5086
self.index += 1;
5187

52-
if node.is_last_child {
88+
if node.is_last_child() {
5389
self.done = true;
5490
}
5591

5692
Some(node)
5793
}
5894
}
5995

96+
let first_child_index = self.first_child_index();
6097
ChildIterator {
61-
index: self.first_child_index,
62-
done: self.first_child_index == 0,
98+
index: first_child_index as usize,
99+
done: first_child_index == 0,
63100
}
64101
}
65102
}
66103

67-
// fn compute_unique_index(input: &str) -> Option<usize> {
68-
// debug_assert!(input.is_ascii());
69-
70-
// let mut index = 0;
71-
// let mut current = &DAFSA_NODES[0];
72-
// for code_point in input.as_bytes() {
73-
// let mut next_node = None;
74-
// for child in current.children() {
75-
// if child.code_point == *code_point {
76-
// next_node = Some(child);
77-
// break;
78-
// } else {
79-
// index += child.num_nodes as usize;
80-
// }
81-
// }
82-
83-
// current = next_node?;
84-
85-
// if current.is_terminal {
86-
// index += 1;
87-
// }
88-
// }
89-
90-
// if current.is_terminal {
91-
// Some(index)
92-
// } else {
93-
// None
94-
// }
95-
// }
96-
97104
pub(crate) fn resolve_unique_hash_value(value: usize) -> CharRef {
98105
let (first, second) = REFERENCES[value];
99106

html5ever/src/tokenizer/char_ref/named.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ impl NamedReferenceTokenizerState {
8080
next_node = Some(child);
8181
break;
8282
} else {
83-
self.hash_value += child.num_nodes() as usize;
83+
self.hash_value += child.hash_value() as usize;
8484
}
8585
}
8686

0 commit comments

Comments
 (0)