|
| 1 | +// Copyright 2014-2017 The html5ever Project Developers. See the |
| 2 | +// COPYRIGHT file at the top-level directory of this distribution. |
| 3 | +// |
| 4 | +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
| 5 | +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
| 6 | +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
| 7 | +// option. This file may not be copied, modified, or distributed |
| 8 | +// except according to those terms. |
| 9 | + |
| 10 | +extern crate html5ever; |
| 11 | +extern crate typed_arena; |
| 12 | + |
| 13 | +use html5ever::{parse_document, QualName, Attribute, ExpandedName}; |
| 14 | +use html5ever::tendril::{TendrilSink, StrTendril}; |
| 15 | +use html5ever::interface::tree_builder::{TreeSink, QuirksMode, NodeOrText, ElementFlags}; |
| 16 | +use std::borrow::Cow; |
| 17 | +use std::cell::{Cell, RefCell}; |
| 18 | +use std::collections::HashSet; |
| 19 | +use std::io::{self, Read}; |
| 20 | +use std::ptr; |
| 21 | + |
| 22 | +fn main() { |
| 23 | + let mut bytes = Vec::new(); |
| 24 | + io::stdin().read_to_end(&mut bytes).unwrap(); |
| 25 | + let arena = typed_arena::Arena::new(); |
| 26 | + html5ever_parse_slice_into_arena(&bytes, &arena); |
| 27 | +} |
| 28 | + |
| 29 | +fn html5ever_parse_slice_into_arena<'a>(bytes: &[u8], arena: Arena<'a>) -> Ref<'a> { |
| 30 | + let sink = Sink { |
| 31 | + arena: arena, |
| 32 | + document: arena.alloc(Node::new(NodeData::Document)), |
| 33 | + quirks_mode: QuirksMode::NoQuirks, |
| 34 | + }; |
| 35 | + parse_document(sink, Default::default()).from_utf8().one(bytes) |
| 36 | +} |
| 37 | + |
| 38 | +type Arena<'arena> = &'arena typed_arena::Arena<Node<'arena>>; |
| 39 | + |
| 40 | +type Ref<'arena> = &'arena Node<'arena>; |
| 41 | + |
| 42 | +type Link<'arena> = Cell<Option<Ref<'arena>>>; |
| 43 | + |
| 44 | +struct Sink<'arena> { |
| 45 | + arena: Arena<'arena>, |
| 46 | + document: Ref<'arena>, |
| 47 | + quirks_mode: QuirksMode, |
| 48 | +} |
| 49 | + |
| 50 | +pub struct Node<'arena> { |
| 51 | + parent: Link<'arena>, |
| 52 | + next_sibling: Link<'arena>, |
| 53 | + previous_sibling: Link<'arena>, |
| 54 | + first_child: Link<'arena>, |
| 55 | + last_child: Link<'arena>, |
| 56 | + data: NodeData<'arena>, |
| 57 | +} |
| 58 | + |
| 59 | +pub enum NodeData<'arena> { |
| 60 | + Document, |
| 61 | + Doctype { |
| 62 | + name: StrTendril, |
| 63 | + public_id: StrTendril, |
| 64 | + system_id: StrTendril, |
| 65 | + }, |
| 66 | + Text { |
| 67 | + contents: RefCell<StrTendril>, |
| 68 | + }, |
| 69 | + Comment { |
| 70 | + contents: StrTendril, |
| 71 | + }, |
| 72 | + Element { |
| 73 | + name: QualName, |
| 74 | + attrs: RefCell<Vec<Attribute>>, |
| 75 | + template_contents: Option<Ref<'arena>>, |
| 76 | + mathml_annotation_xml_integration_point: bool, |
| 77 | + }, |
| 78 | + ProcessingInstruction { |
| 79 | + target: StrTendril, |
| 80 | + contents: StrTendril, |
| 81 | + }, |
| 82 | +} |
| 83 | + |
| 84 | +impl<'arena> Node<'arena> { |
| 85 | + fn new(data: NodeData<'arena>) -> Self { |
| 86 | + Node { |
| 87 | + parent: Cell::new(None), |
| 88 | + previous_sibling: Cell::new(None), |
| 89 | + next_sibling: Cell::new(None), |
| 90 | + first_child: Cell::new(None), |
| 91 | + last_child: Cell::new(None), |
| 92 | + data: data, |
| 93 | + } |
| 94 | + } |
| 95 | + |
| 96 | + fn detach(&self) { |
| 97 | + let parent = self.parent.take(); |
| 98 | + let previous_sibling = self.previous_sibling.take(); |
| 99 | + let next_sibling = self.next_sibling.take(); |
| 100 | + |
| 101 | + if let Some(next_sibling) = next_sibling { |
| 102 | + next_sibling.previous_sibling.set(previous_sibling); |
| 103 | + } else if let Some(parent) = parent { |
| 104 | + parent.last_child.set(previous_sibling); |
| 105 | + } |
| 106 | + |
| 107 | + if let Some(previous_sibling) = previous_sibling { |
| 108 | + previous_sibling.next_sibling.set(next_sibling); |
| 109 | + } else if let Some(parent) = parent { |
| 110 | + parent.first_child.set(next_sibling); |
| 111 | + } |
| 112 | + } |
| 113 | + |
| 114 | + fn append(&'arena self, new_child: &'arena Self) { |
| 115 | + new_child.detach(); |
| 116 | + new_child.parent.set(Some(self)); |
| 117 | + if let Some(last_child) = self.last_child.take() { |
| 118 | + new_child.previous_sibling.set(Some(last_child)); |
| 119 | + debug_assert!(last_child.next_sibling.get().is_none()); |
| 120 | + last_child.next_sibling.set(Some(new_child)); |
| 121 | + } else { |
| 122 | + debug_assert!(self.first_child.get().is_none()); |
| 123 | + self.first_child.set(Some(new_child)); |
| 124 | + } |
| 125 | + self.last_child.set(Some(new_child)); |
| 126 | + } |
| 127 | + |
| 128 | + fn insert_before(&'arena self, new_sibling: &'arena Self) { |
| 129 | + new_sibling.detach(); |
| 130 | + new_sibling.parent.set(self.parent.get()); |
| 131 | + new_sibling.next_sibling.set(Some(self)); |
| 132 | + if let Some(previous_sibling) = self.previous_sibling.take() { |
| 133 | + new_sibling.previous_sibling.set(Some(previous_sibling)); |
| 134 | + debug_assert!(ptr::eq::<Node>(previous_sibling.next_sibling.get().unwrap(), self)); |
| 135 | + previous_sibling.next_sibling.set(Some(new_sibling)); |
| 136 | + } else if let Some(parent) = self.parent.get() { |
| 137 | + debug_assert!(ptr::eq::<Node>(parent.first_child.get().unwrap(), self)); |
| 138 | + parent.first_child.set(Some(new_sibling)); |
| 139 | + } |
| 140 | + self.previous_sibling.set(Some(new_sibling)); |
| 141 | + } |
| 142 | +} |
| 143 | + |
| 144 | +impl<'arena> Sink<'arena> { |
| 145 | + fn new_node(&self, data: NodeData<'arena>) -> Ref<'arena> { |
| 146 | + self.arena.alloc(Node::new(data)) |
| 147 | + } |
| 148 | + |
| 149 | + fn append_common<P, A>(&self, child: NodeOrText<Ref<'arena>>, previous: P, append: A) |
| 150 | + where P: FnOnce() -> Option<Ref<'arena>>, |
| 151 | + A: FnOnce(Ref<'arena>), |
| 152 | + { |
| 153 | + let new_node = match child { |
| 154 | + NodeOrText::AppendText(text) => { |
| 155 | + // Append to an existing Text node if we have one. |
| 156 | + if let Some(&Node { data: NodeData::Text { ref contents }, .. }) = previous() { |
| 157 | + contents.borrow_mut().push_tendril(&text); |
| 158 | + return |
| 159 | + } |
| 160 | + self.new_node(NodeData::Text { contents: RefCell::new(text) }) |
| 161 | + } |
| 162 | + NodeOrText::AppendNode(node) => node |
| 163 | + }; |
| 164 | + |
| 165 | + append(new_node) |
| 166 | + } |
| 167 | +} |
| 168 | + |
| 169 | +impl<'arena> TreeSink for Sink<'arena> { |
| 170 | + type Handle = Ref<'arena>; |
| 171 | + type Output = Ref<'arena>; |
| 172 | + |
| 173 | + fn finish(self) -> Ref<'arena> { |
| 174 | + self.document |
| 175 | + } |
| 176 | + |
| 177 | + fn parse_error(&mut self, _: Cow<'static, str>) {} |
| 178 | + |
| 179 | + fn get_document(&mut self) -> Ref<'arena> { |
| 180 | + self.document |
| 181 | + } |
| 182 | + |
| 183 | + fn set_quirks_mode(&mut self, mode: QuirksMode) { |
| 184 | + self.quirks_mode = mode; |
| 185 | + } |
| 186 | + |
| 187 | + fn same_node(&self, x: &Ref<'arena>, y: &Ref<'arena>) -> bool { |
| 188 | + ptr::eq::<Node>(*x, *y) |
| 189 | + } |
| 190 | + |
| 191 | + fn elem_name<'a>(&self, target: &'a Ref<'arena>) -> ExpandedName<'a> { |
| 192 | + match target.data { |
| 193 | + NodeData::Element { ref name, .. } => name.expanded(), |
| 194 | + _ => panic!("not an element!"), |
| 195 | + } |
| 196 | + } |
| 197 | + |
| 198 | + fn get_template_contents(&mut self, target: &Ref<'arena>) -> Ref<'arena> { |
| 199 | + if let NodeData::Element { template_contents: Some(ref contents), .. } = target.data { |
| 200 | + contents |
| 201 | + } else { |
| 202 | + panic!("not a template element!") |
| 203 | + } |
| 204 | + } |
| 205 | + |
| 206 | + fn is_mathml_annotation_xml_integration_point(&self, target: &Ref<'arena>) -> bool { |
| 207 | + if let NodeData::Element { mathml_annotation_xml_integration_point, .. } = target.data { |
| 208 | + mathml_annotation_xml_integration_point |
| 209 | + } else { |
| 210 | + panic!("not an element!") |
| 211 | + } |
| 212 | + } |
| 213 | + |
| 214 | + fn has_parent_node(&self, node: &Ref<'arena>) -> bool { |
| 215 | + node.parent.get().is_some() |
| 216 | + } |
| 217 | + |
| 218 | + fn create_element(&mut self, name: QualName, attrs: Vec<Attribute>, flags: ElementFlags) -> Ref<'arena> { |
| 219 | + self.new_node(NodeData::Element { |
| 220 | + name: name, |
| 221 | + attrs: RefCell::new(attrs), |
| 222 | + template_contents: if flags.template { |
| 223 | + Some(self.new_node(NodeData::Document)) |
| 224 | + } else { |
| 225 | + None |
| 226 | + }, |
| 227 | + mathml_annotation_xml_integration_point: flags.mathml_annotation_xml_integration_point, |
| 228 | + |
| 229 | + }) |
| 230 | + } |
| 231 | + |
| 232 | + fn create_comment(&mut self, text: StrTendril) -> Ref<'arena> { |
| 233 | + self.new_node(NodeData::Comment { contents: text }) |
| 234 | + } |
| 235 | + |
| 236 | + fn create_pi(&mut self, target: StrTendril, data: StrTendril) -> Ref<'arena> { |
| 237 | + self.new_node(NodeData::ProcessingInstruction { target: target, contents: data }) |
| 238 | + } |
| 239 | + |
| 240 | + fn append(&mut self, parent: &Ref<'arena>, child: NodeOrText<Ref<'arena>>) { |
| 241 | + self.append_common( |
| 242 | + child, |
| 243 | + || parent.last_child.get(), |
| 244 | + |new_node| parent.append(new_node) |
| 245 | + ) |
| 246 | + } |
| 247 | + |
| 248 | + fn append_before_sibling(&mut self, sibling: &Ref<'arena>, child: NodeOrText<Ref<'arena>>) { |
| 249 | + self.append_common( |
| 250 | + child, |
| 251 | + || sibling.previous_sibling.get(), |
| 252 | + |new_node| sibling.insert_before(new_node) |
| 253 | + ) |
| 254 | + } |
| 255 | + |
| 256 | + fn append_based_on_parent_node(&mut self, element: &Ref<'arena>, |
| 257 | + prev_element: &Ref<'arena>, child: NodeOrText<Ref<'arena>>) { |
| 258 | + if self.has_parent_node(element) { |
| 259 | + self.append_before_sibling(element, child) |
| 260 | + } else { |
| 261 | + self.append(prev_element, child) |
| 262 | + } |
| 263 | + } |
| 264 | + |
| 265 | + fn append_doctype_to_document(&mut self, |
| 266 | + name: StrTendril, |
| 267 | + public_id: StrTendril, |
| 268 | + system_id: StrTendril) { |
| 269 | + self.document.append(self.new_node(NodeData::Doctype { |
| 270 | + name: name, |
| 271 | + public_id: public_id, |
| 272 | + system_id: system_id |
| 273 | + })) |
| 274 | + } |
| 275 | + |
| 276 | + fn add_attrs_if_missing(&mut self, target: &Ref<'arena>, attrs: Vec<Attribute>) { |
| 277 | + let mut existing = if let NodeData::Element { ref attrs, .. } = target.data { |
| 278 | + attrs.borrow_mut() |
| 279 | + } else { |
| 280 | + panic!("not an element") |
| 281 | + }; |
| 282 | + |
| 283 | + let existing_names = existing.iter().map(|e| e.name.clone()).collect::<HashSet<_>>(); |
| 284 | + existing.extend(attrs.into_iter().filter(|attr| { |
| 285 | + !existing_names.contains(&attr.name) |
| 286 | + })); |
| 287 | + } |
| 288 | + |
| 289 | + fn remove_from_parent(&mut self, target: &Ref<'arena>) { |
| 290 | + target.detach() |
| 291 | + } |
| 292 | + |
| 293 | + fn reparent_children(&mut self, node: &Ref<'arena>, new_parent: &Ref<'arena>) { |
| 294 | + let mut next_child = node.first_child.get(); |
| 295 | + while let Some(child) = next_child { |
| 296 | + debug_assert!(ptr::eq::<Node>(child.parent.get().unwrap(), *node)); |
| 297 | + next_child = child.next_sibling.get(); |
| 298 | + new_parent.append(child) |
| 299 | + } |
| 300 | + } |
| 301 | +} |
0 commit comments