Skip to content

Commit 41c872e

Browse files
committed
Merge branch 'release/addHasPseudo'
2 parents 268a1f1 + ea5591f commit 41c872e

File tree

8 files changed

+127
-48
lines changed

8 files changed

+127
-48
lines changed

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,11 @@
22

33
0.2.1 版本后各个接口方法已经基本趋于稳定,后面将不会做大的调整。
44

5+
## [0.5.2] - 2022-04-26
6+
7+
### 增加
8+
9+
- 增加 `:has` 伪类选择器,以配合 `:not` 伪类能组成 `:not(:has(p))` 等实现 `has` 反选逻辑。
510
## [0.5.1] - 2022-02-22
611

712
### 修改

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "visdom"
3-
version = "0.5.1"
3+
version = "0.5.2"
44
edition = "2018"
55
description = "A html document syntax and operation library, use APIs similar to jquery, easy to use for web scraping and confused html."
66
keywords = ["html", "scrape", "jquery", "query", "selector"]

src/main.rs

Lines changed: 61 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -12,30 +12,30 @@ fn main() -> Result<(), BoxDynError> {
1212
// let ul = root.find("ul");
1313
// let nth_2n_child = ul.find(":nth-child(2n),:nth-child(1),:nth-child(n+8)");
1414
// println!("2n:{}", nth_2n_child.length());
15-
let html = r##"
16-
<!doctype html>
17-
<html lang="utf-8">
18-
<head></head>
19-
<body>
20-
<div id="id" name="#id">
21-
<div class="class" name="#id .class">class-div</div>
22-
<p>
23-
p-tag
24-
</p>
25-
</div>
26-
<div id="nested" name="#nested">
27-
<div class="outer-div-1" name="#nested .outer-div-1">
28-
<div class="inner-div-1-1" name="#nested .outer-div-1 .inner-div-1-1">inner-div-1-1</div>
29-
<div class="inner-div-1-2" name="#nested .outer-div-1 .inner-div-1-2">inner-div-<span>1</span>-<span>2</span></div>
30-
</div>
31-
<div class="outer-div-2" name="#nested .outer-div-2">
32-
<div class="inner-div-2-1" name="#nested .outer-div-2 .inner-div-2-1"></div>
33-
<div class="inner-div-2-2" name="#nested .outer-div-2 .inner-div-2-1"></div>
34-
</div>
35-
</div>
36-
</body>
37-
</html>
38-
"##;
15+
// let html = r##"
16+
// <!doctype html>
17+
// <html lang="utf-8">
18+
// <head></head>
19+
// <body>
20+
// <div id="id" name="#id">
21+
// <div class="class" name="#id .class">class-div</div>
22+
// <p>
23+
// p-tag
24+
// </p>
25+
// </div>
26+
// <div id="nested" name="#nested">
27+
// <div class="outer-div-1" name="#nested .outer-div-1">
28+
// <div class="inner-div-1-1" name="#nested .outer-div-1 .inner-div-1-1">inner-div-1-1</div>
29+
// <div class="inner-div-1-2" name="#nested .outer-div-1 .inner-div-1-2">inner-div-<span>1</span>-<span>2</span></div>
30+
// </div>
31+
// <div class="outer-div-2" name="#nested .outer-div-2">
32+
// <div class="inner-div-2-1" name="#nested .outer-div-2 .inner-div-2-1"></div>
33+
// <div class="inner-div-2-2" name="#nested .outer-div-2 .inner-div-2-1"></div>
34+
// </div>
35+
// </div>
36+
// </body>
37+
// </html>
38+
// "##;
3939
// let div = root.find("div");
4040
// println!("div:{}", div.length());
4141
// let child_divs = div.find("div:nth-child(1)");
@@ -248,32 +248,50 @@ fn main() -> Result<(), BoxDynError> {
248248
// true
249249
// });
250250
// println!("{}", root.outer_html());
251+
// let html = r#"
252+
// <!doctype html>
253+
// <html>
254+
// <body>
255+
// <dl>
256+
// <dt>Title</dt>
257+
// <dd><span>item1</span></dd>
258+
// <dd class="item2"><span>item2</span></dd>
259+
// <dd class="item3">item3</dd>
260+
// </dl>
261+
262+
// </body>
263+
// </html>
264+
// "#;
265+
// let root = Vis::load(html)?;
266+
// let items = root.find("dl > *");
267+
// println!(
268+
// "items:{}, items:hasnotspan:{}",
269+
// items.length(),
270+
// items.has(":not(span)").length()
271+
// );
272+
// first.map(|_, ele| {
273+
// println!("{:?}", ele.tag_name());
274+
// });
275+
// println!("{}", first.get(0).unwrap().tag_name());
276+
// println!("root:{}", root.find("div").length());
277+
// println!("root:{:?}", root.find("#content").length());
251278
let html = r#"
252279
<!doctype html>
253280
<html>
254281
<body>
255-
<dl>
256-
<dt>Title</dt>
257-
<dd><span>item1</span></dd>
258-
<dd class="item2"><span>item2</span></dd>
259-
<dd class="item3">item3</dd>
260-
</dl>
261-
282+
<div>22<p></p></div>
283+
<div>1</div>
284+
<div>1</div>
262285
</body>
263286
</html>
264287
"#;
265288
let root = Vis::load(html)?;
266-
let items = root.find("dl > *");
267-
println!(
268-
"items:{}, items:hasnotspan:{}",
269-
items.length(),
270-
items.has(":not(span)").length()
271-
);
272-
// first.map(|_, ele| {
273-
// println!("{:?}", ele.tag_name());
274-
// });
275-
// println!("{}", first.get(0).unwrap().tag_name());
276-
// println!("root:{}", root.find("div").length());
277-
// println!("root:{:?}", root.find("#content").length());
289+
let div_no_has_p = root.find("div:not(:has(p))");
290+
println!("div_no_has_p: {}", div_no_has_p.text());
291+
let divs = root.find("div");
292+
let div_has_p = divs.has("p");
293+
println!("div_has_p: {}", div_has_p.text());
294+
let div_no_has_p = divs.not(":has(p)");
295+
println!("div_no_has_p: {}", div_no_has_p.text());
278296
Ok(())
279297
}

src/mesdoc/rules/pseudo.rs

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -469,7 +469,7 @@ fn get_allowed_name_ele(
469469

470470
// collect available elements from siblings
471471
fn collect_avail_name_eles(
472-
node_indexs: &mut Vec<usize>,
472+
node_indexs: &mut [usize],
473473
siblings: &[BoxDynElement],
474474
finded: &mut Vec<BoxDynElement>,
475475
) {
@@ -894,6 +894,25 @@ fn pseudo_contains(rules: &mut Vec<RuleItem>) {
894894

895895
// -----------jquery selectors----------
896896

897+
/// pseudo selector: `:has`
898+
fn pseudo_has(rules: &mut Vec<RuleItem>) {
899+
let name = ":has";
900+
let selector = ":has({spaces}{selector}{spaces})";
901+
let rule = RuleDefItem(
902+
name,
903+
selector,
904+
PRIORITY,
905+
Box::new(|data: MatchedQueue| {
906+
let selector = data[2].chars.iter().collect::<String>();
907+
Matcher {
908+
all_handle: Some(Box::new(move |eles: &Elements, _| eles.has(&selector))),
909+
..Default::default()
910+
}
911+
}),
912+
);
913+
rules.push(rule.into());
914+
}
915+
897916
/// pseudo selector: `:checked`
898917
fn pseudo_checked(rules: &mut Vec<RuleItem>) {
899918
let selector = ":checked";
@@ -1055,6 +1074,8 @@ pub fn init(rules: &mut Vec<RuleItem>) {
10551074
// :contains
10561075
pseudo_contains(rules);
10571076
// ---- jquery selectors -----
1077+
// :has
1078+
pseudo_has(rules);
10581079
// :checked
10591080
pseudo_checked(rules);
10601081
// :header alias

src/mesdoc/selector/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -354,7 +354,7 @@ impl Selector {
354354
if !finded {
355355
if level == 0 {
356356
matched.push(Matched {
357-
chars: chars[0..index].iter().copied().collect(),
357+
chars: chars[0..index].to_vec(),
358358
name: "selector",
359359
..Default::default()
360360
});

src/mesdoc/selector/pattern.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -246,7 +246,7 @@ impl Pattern for Nth {
246246
impl Nth {
247247
fn get_number(data: &MatchedData, keys: (&str, &str), def: Option<&str>) -> Option<&'static str> {
248248
const MINUS: &str = "-";
249-
if let Some(&idx) = data.get(keys.0).or_else(|| def.as_ref()) {
249+
if let Some(&idx) = data.get(keys.0).or(def.as_ref()) {
250250
let mut index = idx.to_owned();
251251
if let Some(&op) = data.get(keys.1) {
252252
if op == MINUS {

tests/attr.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ fn test_normal_attr() -> Result {
4747
input.set_attr("title", Some(title));
4848
assert_eq!(
4949
input.attr("title").unwrap().to_string(),
50-
title.replace("'", "&apos;")
50+
title.replace('\'', "&apos;")
5151
);
5252
Ok(())
5353
}

tests/selector.rs

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -825,6 +825,41 @@ fn test_selector_pseudo_not() -> Result {
825825
Ok(())
826826
}
827827

828+
#[test]
829+
fn test_selector_pseudo_has() -> Result {
830+
let html = r#"
831+
<!doctype html>
832+
<html>
833+
<body>
834+
<div id="container">
835+
<div class="outer"><p>1</p></div>
836+
<div class="outer">2</div>
837+
<div class="outer">3</div>
838+
<div class="outer"><div><p>4</p></div></div>
839+
</div>
840+
</body>
841+
</html>
842+
"#;
843+
let root = Vis::load(html)?;
844+
let container = root.find("#container");
845+
assert_eq!(container.length(), 1);
846+
// div not has a
847+
let div_no_has_p = container.children("div:not(:has(p))");
848+
assert_eq!(div_no_has_p.length(), 2);
849+
assert_eq!(div_no_has_p.text(), "23");
850+
// divs
851+
let divs = container.children("div");
852+
// has p
853+
let div_has_p = divs.has("p");
854+
assert_eq!(div_has_p.length(), 2);
855+
assert_eq!(div_has_p.text(), "14");
856+
// has no p
857+
let div_no_has_p = divs.not(":has(p)");
858+
assert_eq!(div_no_has_p.length(), 2);
859+
assert_eq!(div_no_has_p.text(), "23");
860+
Ok(())
861+
}
862+
828863
#[test]
829864
fn test_wrong_selector_splitter() -> Result {
830865
let root = Vis::load("<b>anything</b>")?;

0 commit comments

Comments
 (0)