Skip to content

Commit 73e39a2

Browse files
author
bors-servo
authored
Auto merge of #288 - servo:arena, r=Ygg01
Add an example with an arena-allocated tree <!-- Reviewable:start --> This change is [<img src="https://reviewable.io/review_button.svg" height="34" align="absmiddle" alt="Reviewable"/>](https://reviewable.io/reviews/servo/html5ever/288) <!-- Reviewable:end -->
2 parents 7278a9e + ad12d5c commit 73e39a2

File tree

2 files changed

+302
-0
lines changed

2 files changed

+302
-0
lines changed

html5ever/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ markup5ever = { version = "0.5", path = "../markup5ever" }
4141
[dev-dependencies]
4242
rustc-serialize = "0.3.15"
4343
rustc-test = "0.2"
44+
typed-arena = "1.3.0"
4445

4546
[build-dependencies]
4647
quote = "0.3.3"

html5ever/examples/arena.rs

Lines changed: 301 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,301 @@
1+
// Copyright 2014-2017 The html5ever Project Developers. See the
2+
// COPYRIGHT file at the top-level directory of this distribution.
3+
//
4+
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5+
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6+
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7+
// option. This file may not be copied, modified, or distributed
8+
// except according to those terms.
9+
10+
extern crate html5ever;
11+
extern crate typed_arena;
12+
13+
use html5ever::{parse_document, QualName, Attribute, ExpandedName};
14+
use html5ever::tendril::{TendrilSink, StrTendril};
15+
use html5ever::interface::tree_builder::{TreeSink, QuirksMode, NodeOrText, ElementFlags};
16+
use std::borrow::Cow;
17+
use std::cell::{Cell, RefCell};
18+
use std::collections::HashSet;
19+
use std::io::{self, Read};
20+
use std::ptr;
21+
22+
fn main() {
23+
let mut bytes = Vec::new();
24+
io::stdin().read_to_end(&mut bytes).unwrap();
25+
let arena = typed_arena::Arena::new();
26+
html5ever_parse_slice_into_arena(&bytes, &arena);
27+
}
28+
29+
fn html5ever_parse_slice_into_arena<'a>(bytes: &[u8], arena: Arena<'a>) -> Ref<'a> {
30+
let sink = Sink {
31+
arena: arena,
32+
document: arena.alloc(Node::new(NodeData::Document)),
33+
quirks_mode: QuirksMode::NoQuirks,
34+
};
35+
parse_document(sink, Default::default()).from_utf8().one(bytes)
36+
}
37+
38+
type Arena<'arena> = &'arena typed_arena::Arena<Node<'arena>>;
39+
40+
type Ref<'arena> = &'arena Node<'arena>;
41+
42+
type Link<'arena> = Cell<Option<Ref<'arena>>>;
43+
44+
struct Sink<'arena> {
45+
arena: Arena<'arena>,
46+
document: Ref<'arena>,
47+
quirks_mode: QuirksMode,
48+
}
49+
50+
pub struct Node<'arena> {
51+
parent: Link<'arena>,
52+
next_sibling: Link<'arena>,
53+
previous_sibling: Link<'arena>,
54+
first_child: Link<'arena>,
55+
last_child: Link<'arena>,
56+
data: NodeData<'arena>,
57+
}
58+
59+
pub enum NodeData<'arena> {
60+
Document,
61+
Doctype {
62+
name: StrTendril,
63+
public_id: StrTendril,
64+
system_id: StrTendril,
65+
},
66+
Text {
67+
contents: RefCell<StrTendril>,
68+
},
69+
Comment {
70+
contents: StrTendril,
71+
},
72+
Element {
73+
name: QualName,
74+
attrs: RefCell<Vec<Attribute>>,
75+
template_contents: Option<Ref<'arena>>,
76+
mathml_annotation_xml_integration_point: bool,
77+
},
78+
ProcessingInstruction {
79+
target: StrTendril,
80+
contents: StrTendril,
81+
},
82+
}
83+
84+
impl<'arena> Node<'arena> {
85+
fn new(data: NodeData<'arena>) -> Self {
86+
Node {
87+
parent: Cell::new(None),
88+
previous_sibling: Cell::new(None),
89+
next_sibling: Cell::new(None),
90+
first_child: Cell::new(None),
91+
last_child: Cell::new(None),
92+
data: data,
93+
}
94+
}
95+
96+
fn detach(&self) {
97+
let parent = self.parent.take();
98+
let previous_sibling = self.previous_sibling.take();
99+
let next_sibling = self.next_sibling.take();
100+
101+
if let Some(next_sibling) = next_sibling {
102+
next_sibling.previous_sibling.set(previous_sibling);
103+
} else if let Some(parent) = parent {
104+
parent.last_child.set(previous_sibling);
105+
}
106+
107+
if let Some(previous_sibling) = previous_sibling {
108+
previous_sibling.next_sibling.set(next_sibling);
109+
} else if let Some(parent) = parent {
110+
parent.first_child.set(next_sibling);
111+
}
112+
}
113+
114+
fn append(&'arena self, new_child: &'arena Self) {
115+
new_child.detach();
116+
new_child.parent.set(Some(self));
117+
if let Some(last_child) = self.last_child.take() {
118+
new_child.previous_sibling.set(Some(last_child));
119+
debug_assert!(last_child.next_sibling.get().is_none());
120+
last_child.next_sibling.set(Some(new_child));
121+
} else {
122+
debug_assert!(self.first_child.get().is_none());
123+
self.first_child.set(Some(new_child));
124+
}
125+
self.last_child.set(Some(new_child));
126+
}
127+
128+
fn insert_before(&'arena self, new_sibling: &'arena Self) {
129+
new_sibling.detach();
130+
new_sibling.parent.set(self.parent.get());
131+
new_sibling.next_sibling.set(Some(self));
132+
if let Some(previous_sibling) = self.previous_sibling.take() {
133+
new_sibling.previous_sibling.set(Some(previous_sibling));
134+
debug_assert!(ptr::eq::<Node>(previous_sibling.next_sibling.get().unwrap(), self));
135+
previous_sibling.next_sibling.set(Some(new_sibling));
136+
} else if let Some(parent) = self.parent.get() {
137+
debug_assert!(ptr::eq::<Node>(parent.first_child.get().unwrap(), self));
138+
parent.first_child.set(Some(new_sibling));
139+
}
140+
self.previous_sibling.set(Some(new_sibling));
141+
}
142+
}
143+
144+
impl<'arena> Sink<'arena> {
145+
fn new_node(&self, data: NodeData<'arena>) -> Ref<'arena> {
146+
self.arena.alloc(Node::new(data))
147+
}
148+
149+
fn append_common<P, A>(&self, child: NodeOrText<Ref<'arena>>, previous: P, append: A)
150+
where P: FnOnce() -> Option<Ref<'arena>>,
151+
A: FnOnce(Ref<'arena>),
152+
{
153+
let new_node = match child {
154+
NodeOrText::AppendText(text) => {
155+
// Append to an existing Text node if we have one.
156+
if let Some(&Node { data: NodeData::Text { ref contents }, .. }) = previous() {
157+
contents.borrow_mut().push_tendril(&text);
158+
return
159+
}
160+
self.new_node(NodeData::Text { contents: RefCell::new(text) })
161+
}
162+
NodeOrText::AppendNode(node) => node
163+
};
164+
165+
append(new_node)
166+
}
167+
}
168+
169+
impl<'arena> TreeSink for Sink<'arena> {
170+
type Handle = Ref<'arena>;
171+
type Output = Ref<'arena>;
172+
173+
fn finish(self) -> Ref<'arena> {
174+
self.document
175+
}
176+
177+
fn parse_error(&mut self, _: Cow<'static, str>) {}
178+
179+
fn get_document(&mut self) -> Ref<'arena> {
180+
self.document
181+
}
182+
183+
fn set_quirks_mode(&mut self, mode: QuirksMode) {
184+
self.quirks_mode = mode;
185+
}
186+
187+
fn same_node(&self, x: &Ref<'arena>, y: &Ref<'arena>) -> bool {
188+
ptr::eq::<Node>(*x, *y)
189+
}
190+
191+
fn elem_name<'a>(&self, target: &'a Ref<'arena>) -> ExpandedName<'a> {
192+
match target.data {
193+
NodeData::Element { ref name, .. } => name.expanded(),
194+
_ => panic!("not an element!"),
195+
}
196+
}
197+
198+
fn get_template_contents(&mut self, target: &Ref<'arena>) -> Ref<'arena> {
199+
if let NodeData::Element { template_contents: Some(ref contents), .. } = target.data {
200+
contents
201+
} else {
202+
panic!("not a template element!")
203+
}
204+
}
205+
206+
fn is_mathml_annotation_xml_integration_point(&self, target: &Ref<'arena>) -> bool {
207+
if let NodeData::Element { mathml_annotation_xml_integration_point, .. } = target.data {
208+
mathml_annotation_xml_integration_point
209+
} else {
210+
panic!("not an element!")
211+
}
212+
}
213+
214+
fn has_parent_node(&self, node: &Ref<'arena>) -> bool {
215+
node.parent.get().is_some()
216+
}
217+
218+
fn create_element(&mut self, name: QualName, attrs: Vec<Attribute>, flags: ElementFlags) -> Ref<'arena> {
219+
self.new_node(NodeData::Element {
220+
name: name,
221+
attrs: RefCell::new(attrs),
222+
template_contents: if flags.template {
223+
Some(self.new_node(NodeData::Document))
224+
} else {
225+
None
226+
},
227+
mathml_annotation_xml_integration_point: flags.mathml_annotation_xml_integration_point,
228+
229+
})
230+
}
231+
232+
fn create_comment(&mut self, text: StrTendril) -> Ref<'arena> {
233+
self.new_node(NodeData::Comment { contents: text })
234+
}
235+
236+
fn create_pi(&mut self, target: StrTendril, data: StrTendril) -> Ref<'arena> {
237+
self.new_node(NodeData::ProcessingInstruction { target: target, contents: data })
238+
}
239+
240+
fn append(&mut self, parent: &Ref<'arena>, child: NodeOrText<Ref<'arena>>) {
241+
self.append_common(
242+
child,
243+
|| parent.last_child.get(),
244+
|new_node| parent.append(new_node)
245+
)
246+
}
247+
248+
fn append_before_sibling(&mut self, sibling: &Ref<'arena>, child: NodeOrText<Ref<'arena>>) {
249+
self.append_common(
250+
child,
251+
|| sibling.previous_sibling.get(),
252+
|new_node| sibling.insert_before(new_node)
253+
)
254+
}
255+
256+
fn append_based_on_parent_node(&mut self, element: &Ref<'arena>,
257+
prev_element: &Ref<'arena>, child: NodeOrText<Ref<'arena>>) {
258+
if self.has_parent_node(element) {
259+
self.append_before_sibling(element, child)
260+
} else {
261+
self.append(prev_element, child)
262+
}
263+
}
264+
265+
fn append_doctype_to_document(&mut self,
266+
name: StrTendril,
267+
public_id: StrTendril,
268+
system_id: StrTendril) {
269+
self.document.append(self.new_node(NodeData::Doctype {
270+
name: name,
271+
public_id: public_id,
272+
system_id: system_id
273+
}))
274+
}
275+
276+
fn add_attrs_if_missing(&mut self, target: &Ref<'arena>, attrs: Vec<Attribute>) {
277+
let mut existing = if let NodeData::Element { ref attrs, .. } = target.data {
278+
attrs.borrow_mut()
279+
} else {
280+
panic!("not an element")
281+
};
282+
283+
let existing_names = existing.iter().map(|e| e.name.clone()).collect::<HashSet<_>>();
284+
existing.extend(attrs.into_iter().filter(|attr| {
285+
!existing_names.contains(&attr.name)
286+
}));
287+
}
288+
289+
fn remove_from_parent(&mut self, target: &Ref<'arena>) {
290+
target.detach()
291+
}
292+
293+
fn reparent_children(&mut self, node: &Ref<'arena>, new_parent: &Ref<'arena>) {
294+
let mut next_child = node.first_child.get();
295+
while let Some(child) = next_child {
296+
debug_assert!(ptr::eq::<Node>(child.parent.get().unwrap(), *node));
297+
next_child = child.next_sibling.get();
298+
new_parent.append(child)
299+
}
300+
}
301+
}

0 commit comments

Comments
 (0)