Skip to content

Commit f59729e

Browse files
Akida31untitaker
authored andcommitted
attach spans to events
The main abstraction is the new function `Emitter::move_position`. The `CallbackEmitter` uses this to track positions and spans. For that, the tokenizer has to do some more work and emit more events, so that the `Emitter` can track the position accurately. An alternative was to add a `Reader` parameter to all emitter methods, which can be used by `CallbackEmitter` to get spans from the reader. We decided for this approach as it introduces less breaking changes and clutter for the users which don't want to consume spans.
1 parent 6a1e92f commit f59729e

23 files changed

+1011
-418
lines changed

Cargo.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ markup5ever_rcdom = "0.5.0-unofficial"
2626
# required for examples/scraper.rs
2727
scraper = "0.21.0"
2828
argh = "0.1.12"
29+
annotate-snippets = "0.11.5"
2930

3031
[features]
3132
# By default this crate depends on the jetscii library for best performance.
@@ -74,5 +75,8 @@ name = "callback_emitter"
7475
name = "scraper"
7576
required-features = ["tree-builder"]
7677

78+
[[example]]
79+
name = "spans"
80+
7781
[lib]
7882
bench = false

examples/callback_emitter.rs

Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -11,27 +11,29 @@
1111
//! link: foo
1212
//! ```
1313
use html5gum::emitters::callback::{CallbackEmitter, CallbackEvent};
14-
use html5gum::{Emitter, IoReader, Tokenizer};
14+
use html5gum::{Emitter, IoReader, Span, Tokenizer};
1515

1616
fn get_emitter() -> impl Emitter<Token = String> {
1717
let mut is_anchor_tag = false;
1818
let mut is_href_attr = false;
1919

20-
CallbackEmitter::new(move |event: CallbackEvent<'_>| match event {
21-
CallbackEvent::OpenStartTag { name } => {
22-
is_anchor_tag = name == b"a";
23-
is_href_attr = false;
24-
None
25-
}
26-
CallbackEvent::AttributeName { name } => {
27-
is_href_attr = name == b"href";
28-
None
29-
}
30-
CallbackEvent::AttributeValue { value } if is_anchor_tag && is_href_attr => {
31-
Some(String::from_utf8_lossy(value).into_owned())
32-
}
33-
_ => None,
34-
})
20+
CallbackEmitter::new(
21+
move |event: CallbackEvent<'_>, _span: Span<()>| match event {
22+
CallbackEvent::OpenStartTag { name } => {
23+
is_anchor_tag = name == b"a";
24+
is_href_attr = false;
25+
None
26+
}
27+
CallbackEvent::AttributeName { name } => {
28+
is_href_attr = name == b"href";
29+
None
30+
}
31+
CallbackEvent::AttributeValue { value } if is_anchor_tag && is_href_attr => {
32+
Some(String::from_utf8_lossy(value).into_owned())
33+
}
34+
_ => None,
35+
},
36+
)
3537
}
3638

3739
fn main() {

examples/spans.rs

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
//! A modified version of `examples/callback_emitter.rs` which prints the location of all links in
2+
//! the input.
3+
//!
4+
//! ```text
5+
//! printf '<h1>Hello world!</h1><a href="foo">bar</a>' | cargo run --example=spans
6+
//! ```
7+
//!
8+
//! Output:
9+
//!
10+
//! ```text
11+
//! link: foo
12+
//! ```
13+
use annotate_snippets::{Level, Renderer, Snippet};
14+
use html5gum::emitters::callback::{CallbackEmitter, CallbackEvent};
15+
use html5gum::{Emitter, IoReader, Span, Tokenizer};
16+
17+
fn get_emitter() -> impl Emitter<Token = (String, Span<usize>)> {
18+
let mut is_anchor_tag = false;
19+
let mut is_href_attr = false;
20+
21+
CallbackEmitter::new(
22+
move |event: CallbackEvent<'_>, span: Span<usize>| match event {
23+
CallbackEvent::OpenStartTag { name } => {
24+
is_anchor_tag = name == b"a";
25+
is_href_attr = false;
26+
None
27+
}
28+
CallbackEvent::AttributeName { name } => {
29+
is_href_attr = name == b"href";
30+
None
31+
}
32+
CallbackEvent::AttributeValue { value } if is_anchor_tag && is_href_attr => Some((
33+
format!(
34+
"found link with content `{}` here",
35+
String::from_utf8_lossy(value)
36+
),
37+
span,
38+
)),
39+
_ => None,
40+
},
41+
)
42+
}
43+
44+
struct CollectingReader<R> {
45+
inner: R,
46+
read: Vec<u8>,
47+
}
48+
49+
impl<R: std::io::Read> std::io::Read for CollectingReader<R> {
50+
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
51+
let len = self.inner.read(buf)?;
52+
self.read.extend_from_slice(&buf[..len]);
53+
Ok(len)
54+
}
55+
}
56+
57+
fn main() {
58+
let mut reader = CollectingReader {
59+
inner: std::io::stdin().lock(),
60+
read: Vec::new(),
61+
};
62+
let spans = Tokenizer::new_with_emitter(IoReader::new(&mut reader), get_emitter())
63+
.flatten()
64+
.collect::<Vec<_>>();
65+
let source = String::from_utf8_lossy(&reader.read);
66+
let mut message = Level::Info.title("found link");
67+
for (label, span) in &spans {
68+
message = message.snippet(
69+
Snippet::source(&source)
70+
.origin("<stdin>")
71+
.fold(true)
72+
.annotation(Level::Info.span(span.start..span.end).label(label)),
73+
);
74+
}
75+
let renderer = Renderer::styled();
76+
println!("{}", renderer.render(message));
77+
}

0 commit comments

Comments
 (0)