Skip to content

Commit 121523b

Browse files
author
bors-servo
authored
Auto merge of #372 - servo:iterate, r=manishearth
Serialize rcdom iteratively Fixes #371.
2 parents 1b4e978 + 014b3ce commit 121523b

File tree

9 files changed

+133
-59
lines changed

9 files changed

+133
-59
lines changed

html5ever/Cargo.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
[package]
22

33
name = "html5ever"
4-
version = "0.22.7"
4+
version = "0.22.8"
55
authors = [ "The html5ever Project Developers" ]
66
license = "MIT / Apache-2.0"
77
repository = "https://github.com/servo/html5ever"
@@ -29,7 +29,7 @@ name = "serializer"
2929
[dependencies]
3030
log = "0.4"
3131
mac = "0.1"
32-
markup5ever = { version = "0.7", path = "../markup5ever" }
32+
markup5ever = { version = "0.8", path = "../markup5ever" }
3333

3434
[dev-dependencies]
3535
rustc-serialize = "0.3.15"

html5ever/examples/print-rcdom.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ use html5ever::tendril::TendrilSink;
2121

2222
// This is not proper HTML serialization, of course.
2323

24-
fn walk(indent: usize, handle: Handle) {
24+
fn walk(indent: usize, handle: &Handle) {
2525
let node = handle;
2626
// FIXME: don't allocate
2727
print!("{}", repeat(" ").take(indent).collect::<String>());
@@ -58,7 +58,7 @@ fn walk(indent: usize, handle: Handle) {
5858
}
5959

6060
for child in node.children.borrow().iter() {
61-
walk(indent + 4, child.clone());
61+
walk(indent + 4, child);
6262
}
6363
}
6464

@@ -73,11 +73,11 @@ fn main() {
7373
.from_utf8()
7474
.read_from(&mut stdin.lock())
7575
.unwrap();
76-
walk(0, dom.document);
76+
walk(0, &dom.document);
7777

7878
if !dom.errors.is_empty() {
7979
println!("\nParse errors:");
80-
for err in dom.errors.into_iter() {
80+
for err in dom.errors.iter() {
8181
println!(" {}", err);
8282
}
8383
}

html5ever/tests/serializer.rs

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -245,3 +245,20 @@ fn doctype() {
245245
serialize(&mut result, &dom.document, Default::default()).unwrap();
246246
assert_eq!(String::from_utf8(result).unwrap(), "<!DOCTYPE html>");
247247
}
248+
249+
#[test]
250+
fn deep_tree() {
251+
let parser = parse_fragment(
252+
RcDom::default(),
253+
ParseOpts::default(),
254+
QualName::new(None, ns!(html), local_name!("div")),
255+
vec![],
256+
);
257+
let src = String::from("<b>".repeat(60_000));
258+
let dom = parser.one(src);
259+
let document = &dom.document;
260+
let opts = SerializeOpts::default();
261+
let mut ret_val = Vec::new();
262+
serialize(&mut ret_val, document, opts)
263+
.expect("Writing to a string shouldn't fail (expect on OOM)");
264+
}

html5ever/tests/tree_builder.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -228,7 +228,7 @@ fn make_test_desc_with_scripting_flag(
228228
.one(data.clone());
229229
// fragment case: serialize children of the html element
230230
// rather than children of the document
231-
let doc = dom.document;
231+
let doc = &dom.document;
232232
let root = &doc.children.borrow()[0];
233233
for child in root.children.borrow().iter() {
234234
serialize(&mut result, 1, child.clone());

markup5ever/Cargo.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "markup5ever"
3-
version = "0.7.7"
3+
version = "0.8.0"
44
authors = [ "The html5ever Project Developers" ]
55
license = "MIT / Apache-2.0"
66
repository = "https://github.com/servo/html5ever"
@@ -16,6 +16,7 @@ path = "lib.rs"
1616
string_cache = "0.7"
1717
phf = "0.7"
1818
tendril = "0.4"
19+
log = "0.4"
1920

2021
[build-dependencies]
2122
string_cache_codegen = "0.4"

markup5ever/lib.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
// option. This file may not be copied, modified, or distributed
88
// except according to those terms.
99

10+
#[macro_use]
11+
extern crate log;
1012
extern crate phf;
1113
extern crate string_cache;
1214
pub extern crate tendril;

markup5ever/rcdom.rs

Lines changed: 100 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,8 @@ pub struct Node {
109109
pub children: RefCell<Vec<Handle>>,
110110
/// Represents this node's data.
111111
pub data: NodeData,
112+
/// Flag to control whether to free any children on destruction.
113+
leak_children_on_drop: Cell<bool>,
112114
}
113115

114116
impl Node {
@@ -118,8 +120,38 @@ impl Node {
118120
data: data,
119121
parent: Cell::new(None),
120122
children: RefCell::new(Vec::new()),
123+
leak_children_on_drop: Cell::new(true),
121124
})
122125
}
126+
127+
/// Drop any child nodes remaining in this node at destruction.
128+
///
129+
/// RcDom's destructor automatically drops any nodes and children that are
130+
/// present in the document. This setting only affects nodes that are dropped
131+
/// by manipulating the tree before RcDom's destructor runs (such as manually
132+
/// removing children from a node after parsing is complete).
133+
///
134+
/// Unsafety: due to the representation of children, this can trigger
135+
/// stack overflow if dropping a node with a very deep tree of children.
136+
/// This is not a recommended configuration to use when interacting with
137+
/// arbitrary HTML content.
138+
pub unsafe fn free_child_nodes_on_drop(&self) {
139+
self.leak_children_on_drop.set(false);
140+
}
141+
}
142+
143+
impl Drop for Node {
144+
fn drop(&mut self) {
145+
if !self.children.borrow().is_empty() {
146+
if self.leak_children_on_drop.get() {
147+
warn!("Dropping node with children outside of RcDom's destructor. \
148+
Leaking memory for {} children.", self.children.borrow().len());
149+
for child in mem::replace(&mut *self.children.borrow_mut(), vec![]) {
150+
mem::forget(child);
151+
}
152+
}
153+
}
154+
}
123155
}
124156

125157
impl fmt::Debug for Node {
@@ -195,6 +227,18 @@ pub struct RcDom {
195227
pub quirks_mode: QuirksMode,
196228
}
197229

230+
impl Drop for RcDom {
231+
fn drop(&mut self) {
232+
// Ensure that node destructors execute linearly, rather
233+
// than recursing through a tree of arbitrary depth.
234+
let mut to_be_processed = vec![self.document.clone()];
235+
while let Some(node) = to_be_processed.pop() {
236+
to_be_processed.extend_from_slice(&*node.children.borrow());
237+
node.children.borrow_mut().clear();
238+
}
239+
}
240+
}
241+
198242
impl TreeSink for RcDom {
199243
type Output = Self;
200244
fn finish(self) -> Self {
@@ -418,61 +462,71 @@ impl Default for RcDom {
418462
}
419463
}
420464

465+
enum SerializeOp {
466+
Open(Handle),
467+
Close(QualName)
468+
}
469+
421470
impl Serialize for Handle {
422471
fn serialize<S>(&self, serializer: &mut S, traversal_scope: TraversalScope) -> io::Result<()>
423472
where
424473
S: Serializer,
425474
{
426-
match (&traversal_scope, &self.data) {
427-
(
428-
_,
429-
&NodeData::Element {
430-
ref name,
431-
ref attrs,
432-
..
433-
},
434-
) => {
435-
if traversal_scope == IncludeNode {
436-
try!(serializer.start_elem(
437-
name.clone(),
438-
attrs.borrow().iter().map(|at| (&at.name, &at.value[..]))
439-
));
440-
}
441-
442-
for handle in self.children.borrow().iter() {
443-
try!(handle.clone().serialize(serializer, IncludeNode));
444-
}
475+
let mut ops = match traversal_scope {
476+
IncludeNode => vec![SerializeOp::Open(self.clone())],
477+
ChildrenOnly(_) => self
478+
.children
479+
.borrow()
480+
.iter()
481+
.map(|h| SerializeOp::Open(h.clone())).collect(),
482+
};
445483

446-
if traversal_scope == IncludeNode {
447-
try!(serializer.end_elem(name.clone()));
484+
while !ops.is_empty() {
485+
match ops.remove(0) {
486+
SerializeOp::Open(handle) => {
487+
match &handle.data {
488+
&NodeData::Element {
489+
ref name,
490+
ref attrs,
491+
..
492+
} => {
493+
try!(serializer.start_elem(
494+
name.clone(),
495+
attrs.borrow().iter().map(|at| (&at.name, &at.value[..]))
496+
));
497+
498+
ops.insert(0, SerializeOp::Close(name.clone()));
499+
500+
for child in handle.children.borrow().iter().rev() {
501+
ops.insert(0, SerializeOp::Open(child.clone()));
502+
}
503+
}
504+
505+
&NodeData::Doctype { ref name, .. } => serializer.write_doctype(&name)?,
506+
507+
&NodeData::Text { ref contents } => {
508+
serializer.write_text(&contents.borrow())?
509+
}
510+
511+
&NodeData::Comment { ref contents } => {
512+
serializer.write_comment(&contents)?
513+
},
514+
515+
&NodeData::ProcessingInstruction {
516+
ref target,
517+
ref contents,
518+
} => serializer.write_processing_instruction(target, contents)?,
519+
520+
&NodeData::Document => panic!("Can't serialize Document node itself"),
521+
}
448522
}
449-
Ok(())
450-
},
451523

452-
(&ChildrenOnly(_), &NodeData::Document) => {
453-
for handle in self.children.borrow().iter() {
454-
try!(handle.clone().serialize(serializer, IncludeNode));
524+
SerializeOp::Close(name) => {
525+
try!(serializer.end_elem(name));
455526
}
456-
Ok(())
457-
},
458-
459-
(&ChildrenOnly(_), _) => Ok(()),
460-
461-
(&IncludeNode, &NodeData::Doctype { ref name, .. }) => serializer.write_doctype(&name),
462-
(&IncludeNode, &NodeData::Text { ref contents }) => {
463-
serializer.write_text(&contents.borrow())
464-
},
465-
(&IncludeNode, &NodeData::Comment { ref contents }) => {
466-
serializer.write_comment(&contents)
467-
},
468-
(
469-
&IncludeNode,
470-
&NodeData::ProcessingInstruction {
471-
ref target,
472-
ref contents,
473-
},
474-
) => serializer.write_processing_instruction(target, contents),
475-
(&IncludeNode, &NodeData::Document) => panic!("Can't serialize Document node itself"),
527+
}
476528
}
529+
530+
Ok(())
477531
}
478532
}

xml5ever/Cargo.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
[package]
22

33
name = "xml5ever"
4-
version = "0.13.1"
4+
version = "0.13.2"
55
authors = ["The xml5ever project developers"]
66
license = "MIT / Apache-2.0"
77
repository = "https://github.com/servo/html5ever"
@@ -23,7 +23,7 @@ doctest = true
2323
time = "0.1"
2424
log = "0.4"
2525
mac = "0.1"
26-
markup5ever = {version = "0.7", path = "../markup5ever" }
26+
markup5ever = {version = "0.8", path = "../markup5ever" }
2727

2828
[dev-dependencies]
2929
rustc-serialize = "0.3.15"

xml5ever/examples/xml_tree_printer.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ use xml5ever::driver::parse_document;
1818
use xml5ever::rcdom::{Handle, NodeData, RcDom};
1919
use xml5ever::tendril::TendrilSink;
2020

21-
fn walk(prefix: &str, handle: Handle) {
21+
fn walk(prefix: &str, handle: &Handle) {
2222
let node = handle;
2323

2424
print!("{}", prefix);
@@ -50,7 +50,7 @@ fn walk(prefix: &str, handle: Handle) {
5050
_ => false,
5151
})
5252
{
53-
walk(&new_indent, child.clone());
53+
walk(&new_indent, child);
5454
}
5555
}
5656

@@ -69,5 +69,5 @@ fn main() {
6969
.unwrap();;
7070

7171
// Execute our visualizer on RcDom
72-
walk("", dom.document);
72+
walk("", &dom.document);
7373
}

0 commit comments

Comments
 (0)