Skip to content

Commit 60841b2

Browse files
committed
todo
1 parent 6662385 commit 60841b2

File tree

9 files changed

+45
-45
lines changed

9 files changed

+45
-45
lines changed

html5ever/Cargo.toml

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ authors.workspace = true
1111
repository.workspace = true
1212
edition.workspace = true
1313
rust-version.workspace = true
14-
build = "build/main.rs"
1514

1615
[features]
1716
trace_tokenizer = []
@@ -25,10 +24,6 @@ log = { workspace = true }
2524
criterion = { workspace = true }
2625
typed-arena = { workspace = true }
2726

28-
[build-dependencies]
29-
serde = { workspace = true }
30-
serde_json = { workspace = true }
31-
3227
[[bench]]
3328
name = "html5ever"
3429
harness = false

html5ever/src/tokenizer/char_ref/mod.rs

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -57,13 +57,6 @@ pub(super) struct CharRefTokenizer {
5757
hex_marker: Option<char>,
5858
}
5959

60-
impl CharRef {
61-
const EMPTY: CharRef = CharRef {
62-
chars: ['\0', '\0'],
63-
num_chars: 0,
64-
};
65-
}
66-
6760
impl CharRefTokenizer {
6861
pub(super) fn new(is_consumed_in_attribute: bool) -> CharRefTokenizer {
6962
CharRefTokenizer {

markup5ever/Cargo.toml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ authors.workspace = true
99
repository.workspace = true
1010
edition.workspace = true
1111
rust-version.workspace = true
12+
build = "build/main.rs"
1213

1314
[lib]
1415
path = "lib.rs"
@@ -17,3 +18,7 @@ path = "lib.rs"
1718
web_atoms = { workspace = true }
1819
tendril = { workspace = true }
1920
log = { workspace = true }
21+
22+
[build-dependencies]
23+
serde = { workspace = true }
24+
serde_json = { workspace = true }
File renamed without changes.
File renamed without changes.

markup5ever/lib.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,3 +52,5 @@ mod util {
5252
pub use interface::{Attribute, ExpandedName, QualName, TokenizerResult};
5353
pub use util::smallcharset::SmallCharSet;
5454
pub use util::*;
55+
56+
pub mod named_entities;

html5ever/src/tokenizer/char_ref/named.rs renamed to markup5ever/named_entities/mod.rs

Lines changed: 38 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -7,22 +7,31 @@
77
// option. This file may not be copied, modified, or distributed
88
// except according to those terms.
99

10-
use super::codegen::{resolve_unique_hash_value, Node, DAFSA_NODES};
10+
mod codegen;
11+
12+
use codegen::{resolve_unique_hash_value, Node, DAFSA_NODES};
1113
use super::{CharRef, Status};
12-
use crate::tokenizer::TokenSink;
13-
use crate::tokenizer::Tokenizer;
14-
use markup5ever::buffer_queue::BufferQueue;
15-
use markup5ever::tendril::StrTendril;
14+
use crate::buffer_queue::BufferQueue;
15+
use crate::tendril::StrTendril;
1616

1717
use std::borrow::Cow;
1818
use std::mem;
1919

20+
type EmitErrorFn = Fn(&str);
21+
2022
#[derive(Clone, Debug)]
2123
pub(crate) struct Match {
2224
hash_value: usize,
2325
matched_text: StrTendril,
2426
}
2527

28+
impl CharRef {
29+
pub const EMPTY: CharRef = CharRef {
30+
chars: ['\0', '\0'],
31+
num_chars: 0,
32+
};
33+
}
34+
2635
#[derive(Clone, Debug)]
2736
pub(crate) struct NamedReferenceTokenizerState {
2837
current_node: &'static Node,
@@ -62,15 +71,15 @@ impl NamedReferenceTokenizerState {
6271
}
6372
}
6473

65-
fn feed_character<Sink: TokenSink>(
74+
fn feed_character(
6675
&mut self,
6776
c: char,
68-
tokenizer: &Tokenizer<Sink>,
77+
error_callback: EmitErrorFn,
6978
input: &BufferQueue,
7079
) -> NamedReferenceTokenizationResult {
7180
self.name_buffer.push_char(c);
7281
if !c.is_ascii_alphanumeric() && c != ';' {
73-
return self.did_find_invalid_character(tokenizer, input);
82+
return self.did_find_invalid_character(error_callback, input);
7483
}
7584

7685
let code_point = c as u32 as u8;
@@ -85,7 +94,7 @@ impl NamedReferenceTokenizerState {
8594
}
8695

8796
let Some(next_node) = next_node else {
88-
return self.did_find_invalid_character(tokenizer, input);
97+
return self.did_find_invalid_character(error_callback, input);
8998
};
9099

91100
self.current_node = next_node;
@@ -101,24 +110,24 @@ impl NamedReferenceTokenizerState {
101110
NamedReferenceTokenizationResult::Continue
102111
}
103112

104-
fn did_find_invalid_character<Sink: TokenSink>(
113+
fn did_find_invalid_character(
105114
&mut self,
106-
tokenizer: &Tokenizer<Sink>,
115+
error_callback: EmitErrorFn,
107116
input: &BufferQueue,
108117
) -> NamedReferenceTokenizationResult {
109118
if let Some(last_match) = self.last_match.take() {
110119
input.push_front(self.name_buffer.clone());
111120
return NamedReferenceTokenizationResult::Success {
112-
reference: self.finish_matching_reference(last_match, tokenizer, input),
121+
reference: self.finish_matching_reference(last_match, error_callback, input),
113122
};
114123
}
115124

116125
NamedReferenceTokenizationResult::Failed
117126
}
118127

119-
pub(crate) fn step<Sink: TokenSink>(
128+
pub(crate) fn step(
120129
&mut self,
121-
tokenizer: &Tokenizer<Sink>,
130+
error_callback: EmitErrorFn,
122131
input: &BufferQueue,
123132
) -> Result<Status, StrTendril> {
124133
loop {
@@ -139,18 +148,18 @@ impl NamedReferenceTokenizerState {
139148
}
140149
}
141150

142-
pub(crate) fn notify_end_of_file<Sink: TokenSink>(
151+
pub(crate) fn notify_end_of_file(
143152
&mut self,
144-
tokenizer: &Tokenizer<Sink>,
153+
error_callback: EmitErrorFn,
145154
input: &BufferQueue,
146155
) -> Option<CharRef> {
147156
input.push_front(self.name_buffer.clone());
148157
if let Some(last_match) = self.last_match.take() {
149-
Some(self.finish_matching_reference(last_match, tokenizer, input))
158+
Some(self.finish_matching_reference(last_match, error_callback, input))
150159
} else {
151160
if self.name_buffer.ends_with(';') {
152161
println!("end of file and last is semicolon");
153-
emit_name_error(mem::take(&mut self.name_buffer), tokenizer);
162+
emit_name_error(mem::take(&mut self.name_buffer), error_callback);
154163
}
155164
None
156165
}
@@ -159,10 +168,10 @@ impl NamedReferenceTokenizerState {
159168
/// Called whenever the tokenizer has finished matching a named reference.
160169
///
161170
/// This method takes care of emitting appropriate errors and implement some legacy quirks.
162-
pub(crate) fn finish_matching_reference<Sink: TokenSink>(
171+
pub(crate) fn finish_matching_reference(
163172
&self,
164173
matched: Match,
165-
tokenizer: &Tokenizer<Sink>,
174+
error_callback: EmitErrorFn,
166175
input: &BufferQueue,
167176
) -> CharRef {
168177
let char_ref = resolve_unique_hash_value(matched.hash_value);
@@ -190,19 +199,17 @@ impl NamedReferenceTokenizerState {
190199
// (;), then this is a missing-semicolon-after-character-reference parse
191200
// error.
192201
if last_matched_codepoint != ';' {
193-
tokenizer.emit_error(Cow::Borrowed(
194-
"Character reference does not end with semicolon",
195-
));
202+
error_callback("Character reference does not end with semicolon");
196203
}
197204
char_ref
198205
}
199206
}
200207

201-
pub(crate) fn emit_name_error<Sink: TokenSink>(name: StrTendril, tokenizer: &Tokenizer<Sink>) {
202-
let msg = if tokenizer.opts.exact_errors {
203-
Cow::from(format!("Invalid character reference &{}", name))
204-
} else {
205-
Cow::from("Invalid character reference")
206-
};
207-
tokenizer.emit_error(msg);
208-
}
208+
// pub(crate) fn emit_name_error(name: StrTendril, tokenizer: &Tokenizer<Sink>) {
209+
// let msg = if tokenizer.opts.exact_errors {
210+
// Cow::from(format!("Invalid character reference &{}", name))
211+
// } else {
212+
// Cow::from("Invalid character reference")
213+
// };
214+
// tokenizer.emit_error(msg);
215+
// }

web_atoms/lib.rs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,6 @@
1111
// This error is coming from code generated by PHF which we cannot directly fix
1212
#![allow(clippy::empty_line_after_doc_comments)]
1313

14-
use phf::Map;
15-
1614
/// The spec replaces most characters in the ISO-2022 C1 control code range
1715
/// (U+0080 through U+009F) with these characters, based on Windows 8-bit
1816
/// codepages.

0 commit comments

Comments
 (0)