Skip to content

Commit 4b0d6b8

Browse files
committed
feat: implement query selectors, optimize performance, and fix benchmarks
- Implement select, selectAll, matches, closest, and isDefaultNamespace - Optimize performance: enable LTO, use mimalloc, direct string parsing - Fix clippy warnings and clean up dependencies - Fix benchmarks and update README - Add tests for new selector features
1 parent 6e302be commit 4b0d6b8

File tree

13 files changed

+257
-109
lines changed

13 files changed

+257
-109
lines changed

Cargo.lock

Lines changed: 42 additions & 6 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,5 +8,8 @@ html5ever = "0.27.0"
88
indexmap = "2.2.6"
99

1010
[profile.release]
11-
lto = true
12-
strip = "symbols"
11+
codegen-units = 1
12+
lto = "fat"
13+
opt-level = 3
14+
panic = "abort"
15+
strip = "symbols"

README.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,6 @@ npm test
186186
## Benchmark
187187

188188
```bash
189-
cargo benchmark
190189
npm run benchmark
191190
```
192191

benchmark/domparser.mjs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ export default function cheerio() {
1414

1515
bench.on("result", function (stat) {
1616
console.log(
17-
"niddle :" +
17+
"domparser-rs :" +
1818
stat.mean().toPrecision(6) +
1919
" ms/file ± " +
2020
stat.sd().toPrecision(6),

crates/domparser/Cargo.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@ name = "domparser"
44
version = "0.0.0"
55

66
[dependencies]
7-
html5ever = { workspace = true }
8-
indexmap = { workspace = true }
7+
html5ever = "0.27.0"
8+
indexmap = "2.2.6"
99
markup5ever = "0.11.0"
1010
markup5ever_rcdom = "0.3.0"
1111
tendril = "0.4.3"

crates/domparser/src/lib.rs

Lines changed: 5 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
1+
use html5ever::parse_document;
12
use html5ever::tendril::TendrilSink;
2-
use html5ever::{local_name, namespace_url, ns, parse_document, QualName};
3-
use markup5ever_rcdom::{Node, NodeData, RcDom};
4-
use std::cell::RefCell;
5-
use std::rc::Rc;
3+
use markup5ever_rcdom::RcDom;
64

75
pub mod node;
86
pub mod serializer;
@@ -11,39 +9,9 @@ pub use markup5ever_rcdom;
119
pub use node::DomNode;
1210

1311
pub fn parse(html: String) -> DomNode {
14-
let dom = parse_document(RcDom::default(), Default::default())
15-
.from_utf8()
16-
.read_from(&mut html.as_bytes())
17-
.unwrap();
18-
19-
{
20-
let mut children = dom.document.children.borrow_mut();
21-
let has_html = children.iter().any(|c| {
22-
if let NodeData::Element { name, .. } = &c.data {
23-
name.local.as_ref() == "html"
24-
} else {
25-
false
26-
}
27-
});
28-
29-
if !has_html {
30-
let html_name = QualName::new(None, ns!(html), local_name!("html"));
31-
let html_node = Node::new(NodeData::Element {
32-
name: html_name,
33-
attrs: RefCell::new(vec![]),
34-
template_contents: RefCell::new(None),
35-
mathml_annotation_xml_integration_point: false,
36-
});
37-
38-
for child in children.drain(..) {
39-
html_node.children.borrow_mut().push(child.clone());
40-
child.parent.set(Some(Rc::downgrade(&html_node)));
41-
}
42-
43-
children.push(html_node.clone());
44-
html_node.parent.set(Some(Rc::downgrade(&dom.document)));
45-
}
46-
}
12+
let mut parser = parse_document(RcDom::default(), Default::default());
13+
parser.process(html.into());
14+
let dom = parser.finish();
4715

4816
DomNode(dom.document)
4917
}

crates/domparser/src/node/mod.rs

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -70,11 +70,10 @@ fn clone_node_data(data: &NodeData) -> NodeData {
7070
mathml_annotation_xml_integration_point,
7171
} => {
7272
let new_attrs = attrs.borrow().clone();
73-
let new_template_contents = if let Some(tc) = template_contents.borrow().as_ref() {
74-
Some(clone_handle_recursive(tc))
75-
} else {
76-
None
77-
};
73+
let new_template_contents = template_contents
74+
.borrow()
75+
.as_ref()
76+
.map(clone_handle_recursive);
7877
NodeData::Element {
7978
name: name.clone(),
8079
attrs: RefCell::new(new_attrs),

crates/domparser/src/node/properties.rs

Lines changed: 20 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -222,11 +222,8 @@ impl DomNode {
222222

223223
pub fn set_node_value(&self, value: Option<String>) {
224224
if let Some(val) = value {
225-
match &self.0.data {
226-
NodeData::Text { contents } => {
227-
*contents.borrow_mut() = val.into();
228-
}
229-
_ => {}
225+
if let NodeData::Text { contents } = &self.0.data {
226+
*contents.borrow_mut() = val.into();
230227
}
231228
}
232229
}
@@ -286,12 +283,9 @@ impl DomNode {
286283
fn get_text(handle: &Handle) -> String {
287284
match &handle.data {
288285
NodeData::Text { contents } => contents.borrow().to_string(),
289-
NodeData::Element { .. } | NodeData::Document => handle
290-
.children
291-
.borrow()
292-
.iter()
293-
.map(|child| get_text(child))
294-
.collect(),
286+
NodeData::Element { .. } | NodeData::Document => {
287+
handle.children.borrow().iter().map(get_text).collect()
288+
}
295289
_ => "".to_string(),
296290
}
297291
}
@@ -541,8 +535,8 @@ impl DomNode {
541535
if let NodeData::Element { attrs, .. } = &self.0.data {
542536
for attr in attrs.borrow().iter() {
543537
let name = attr.name.local.to_string();
544-
if name.starts_with("data-") {
545-
let key = kebab_to_camel(&name[5..]);
538+
if let Some(stripped) = name.strip_prefix("data-") {
539+
let key = kebab_to_camel(stripped);
546540
map.insert(key, attr.value.to_string());
547541
}
548542
}
@@ -769,7 +763,7 @@ impl DomNode {
769763
}
770764

771765
let is_empty = if let NodeData::Text { contents } = &children[i].data {
772-
contents.borrow().len() == 0
766+
contents.borrow().is_empty()
773767
} else {
774768
false
775769
};
@@ -799,10 +793,8 @@ impl DomNode {
799793
{
800794
return Some(attr.value.to_string());
801795
}
802-
} else {
803-
if attr.name.local.as_ref() == "xmlns" && attr.name.prefix.is_none() {
804-
return Some(attr.value.to_string());
805-
}
796+
} else if attr.name.local.as_ref() == "xmlns" && attr.name.prefix.is_none() {
797+
return Some(attr.value.to_string());
806798
}
807799
}
808800
}
@@ -816,10 +808,10 @@ impl DomNode {
816808
while let Some(node) = current {
817809
if let NodeData::Element { attrs, .. } = &node.data {
818810
for attr in attrs.borrow().iter() {
819-
if attr.value.as_ref() == namespace {
820-
if attr.name.prefix.as_ref().map(|s| s.as_ref()) == Some("xmlns") {
821-
return Some(attr.name.local.to_string());
822-
}
811+
if attr.value.as_ref() == namespace
812+
&& attr.name.prefix.as_ref().map(|s| s.as_ref()) == Some("xmlns")
813+
{
814+
return Some(attr.name.local.to_string());
823815
}
824816
}
825817
}
@@ -891,9 +883,9 @@ impl DomNode {
891883
let pos2 = children.iter().position(|x| Rc::ptr_eq(x, child2)).unwrap();
892884

893885
if pos1 < pos2 {
894-
return 4;
886+
4
895887
} else {
896-
return 2;
888+
2
897889
}
898890
}
899891
}
@@ -904,13 +896,11 @@ fn kebab_to_camel(s: &str) -> String {
904896
for c in s.chars() {
905897
if c == '-' {
906898
next_upper = true;
899+
} else if next_upper {
900+
result.push(c.to_ascii_uppercase());
901+
next_upper = false;
907902
} else {
908-
if next_upper {
909-
result.push(c.to_ascii_uppercase());
910-
next_upper = false;
911-
} else {
912-
result.push(c);
913-
}
903+
result.push(c);
914904
}
915905
}
916906
result

0 commit comments

Comments
 (0)