Skip to content

Commit 453b89c

Browse files
authored
Document xml5ever/tokenizer/states.rs (#650)
Signed-off-by: Simon Wülker <[email protected]>
1 parent b78a68a commit 453b89c

File tree

1 file changed

+100
-5
lines changed

1 file changed

+100
-5
lines changed

xml5ever/src/tokenizer/states.rs

Lines changed: 100 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,73 +8,168 @@
88
// except according to those terms.
99

1010
//! Tokenizer states.
11-
//!
12-
//! This is public for use by the tokenizer tests. Other library
13-
//! users should not have to care about this.
14-
15-
#![allow(missing_docs)] // FIXME
1611
1712
pub use AttrValueKind::*;
1813
pub use DoctypeKind::*;
1914
pub use XmlState::*;
2015

16+
/// Specifies either the public or system identifier from a [Document Type Declaration] (DTD).
17+
///
18+
/// [Document Type Declaration]: https://en.wikipedia.org/wiki/Document_type_declaration
2119
#[derive(PartialEq, Eq, PartialOrd, Ord, Copy, Clone, Hash, Debug)]
2220
pub enum DoctypeKind {
21+
/// The public identifier.
2322
Public,
23+
/// The system identifier.
2424
System,
2525
}
2626

27+
/// Specifies the different states a XML tokenizer will assume during parsing.
2728
#[derive(PartialEq, Eq, PartialOrd, Ord, Copy, Clone, Hash, Debug)]
2829
pub enum XmlState {
30+
/// The initial state of the parser.
31+
///
32+
/// It is equivalent to the [`Data`](https://html.spec.whatwg.org/#data-state) state of the html parser,
33+
/// except null codepoints do not cause errors.
2934
Data,
35+
/// Indicates that the parser has found a `<` character and will try to parse a tag.
3036
TagState,
37+
/// Indicates that the parser has consumed the `/` of a closing tag, like `</foo>`.
3138
EndTagState,
39+
/// Indicates that the parser is currently parsing the name of a closing tag, like the `foo` of `</foo>`.
3240
EndTagName,
41+
/// Indicates that the parser has finished parsing the name of a closing tag and expects a `>` to follow.
3342
EndTagNameAfter,
43+
/// Indicates that the parser has started parsing a [processing instruction] (PI).
44+
///
45+
/// This state is reached after the initial `?` character has been consumed.
46+
///
47+
/// [processing instruction]: https://en.wikipedia.org/wiki/Processing_Instruction
3448
Pi,
49+
/// Indicates that the parser is currently parsing the target of a [processing instruction].
50+
///
51+
/// For example, the target of `<?xml-stylesheet type="text/xsl" href="style.xsl"?>` is `xml-stylesheet`.
52+
///
53+
/// [processing instruction]: https://en.wikipedia.org/wiki/Processing_Instruction
3554
PiTarget,
55+
/// Indicates that the parser has finished parsing the target of a [processing instruction].
56+
///
57+
/// [processing instruction]: https://en.wikipedia.org/wiki/Processing_Instruction
3658
PiTargetAfter,
59+
/// Indicates that the parser is currently parsing the data of a [processing instruction].
60+
///
61+
/// The "data" refers to everything between the target and the closing `?` character.
62+
///
63+
/// [processing instruction]: https://en.wikipedia.org/wiki/Processing_Instruction
3764
PiData,
65+
/// Indicates that the parser has parsed the closing `?` of a [processing instruction].
66+
///
67+
/// [processing instruction]: https://en.wikipedia.org/wiki/Processing_Instruction
3868
PiAfter,
69+
/// Indicates that the parser has parsed the initial `!` of a markup declaration.
70+
///
71+
/// Examples of such declarations include `<!ENTITY chap1 SYSTEM "chap1.xml">` or `<!-- Comment -->`.
3972
MarkupDecl,
73+
/// Indicates that the parser has parsed the start of a comment (`<!--`).
4074
CommentStart,
75+
/// Indicates that the parser has parsed the start of a comment and a `-` directly after it.
4176
CommentStartDash,
77+
/// Indicates that the parser is currently parsing the data within a comment.
4278
Comment,
79+
/// Indicates that the parser has parsed a `<` character within a comment.
4380
CommentLessThan,
81+
/// Indicates that the parser has parsed `<!` within a comment.
4482
CommentLessThanBang,
83+
/// Indicates that the parser has parsed `<!-` within a comment.
4584
CommentLessThanBangDash,
85+
/// Indicates that the parser has parsed `<!--` within a comment.
4686
CommentLessThanBangDashDash,
87+
/// Indicates that the parser has parsed two `-` characters within a comment which may or may not
88+
/// be the beginning of the comment end (`-->`).
4789
CommentEnd,
90+
/// Indicates that the parser has parsed a `-` character within a comment which may or may not
91+
/// be the beginning of the comment end (`-->`).
4892
CommentEndDash,
93+
/// Indicates that the parser has parsed `--!` within a comment which may or may not be part of the
94+
/// end of the comment. Comments in XML can be closed with `--!>`.
4995
CommentEndBang,
96+
/// Indicates that the parser has parsed the beginning of a CDATA section (`<![CDATA[`).
5097
Cdata,
98+
/// Indicates that the parser has parsed a `]` character within a CDATA section, which may be part of
99+
/// the end of the section (`]]>`).
51100
CdataBracket,
101+
/// Indicates that the parser has parsed two `]` characters within a CDATA section, which may be part of
102+
/// the end of the section (`]]>`).
52103
CdataEnd,
104+
/// Indicates that the parser is currently parsing the name of a tag, such as `foo` in `<foo>`.
53105
TagName,
106+
/// Indicates that the parser has parsed the `/` of a self-closing tag, such as `<foo/>`.
54107
TagEmpty,
108+
/// Indicates that the parser has finished parsing the name of a tag and is now expecting either attributes or
109+
/// a `>`.
55110
TagAttrNameBefore,
111+
/// Indicates that the parser is currently parsing the name of an attribute within a tag, such as
112+
/// `bar` in `<foo bar=baz>`.
56113
TagAttrName,
114+
/// Indicates that the parser has finished parsing the name of an attribute.
57115
TagAttrNameAfter,
116+
/// Indicates that the parser is about to parse the value of an attribute.
58117
TagAttrValueBefore,
118+
/// Indicates that the parser is currently parsing the value of an attribute, such as `baz` in
119+
/// `<foo bar=baz>`.
120+
///
121+
/// Includes information about how the value is quoted, because the quotes before and after the attribute
122+
/// value need to match.
59123
TagAttrValue(AttrValueKind),
124+
/// Indicates that the parser has parsed the beginning of a document type definition (`<!DOCTYPE`).
60125
Doctype,
126+
/// Indicates that the parser expects to parse the name of the document type definition next.
61127
BeforeDoctypeName,
128+
/// Indicates that the parser is currently parsing the name of a document type definition, such as
129+
/// `html` in `<!DOCTYPE html>`.
62130
DoctypeName,
131+
/// Indicates that the parser has finished parsing the name of the document type definition and now optionally
132+
/// expects either a public or a system identifier.
63133
AfterDoctypeName,
134+
/// Indicates that the parser has parsed a keyword for either a public or system identifier (`PUBLIC` or `SYSTEM`).
64135
AfterDoctypeKeyword(DoctypeKind),
136+
/// Indicates that the parser is about to parse the value of a public or system identifier within
137+
/// a document type definition, such as `foo` in
138+
/// `<!DOCTYPE html PUBLIC "foo" "bar">`.
65139
BeforeDoctypeIdentifier(DoctypeKind),
140+
/// Indicates that the parser is currently parsing the value of a public or system identifier
141+
/// that is surrounded by double quotes , such as `foo` in
142+
/// `<!DOCTYPE html PUBLIC "foo" "bar">`.
66143
DoctypeIdentifierDoubleQuoted(DoctypeKind),
144+
/// Indicates that the parser is currently parsing the value of a public or system identifier
145+
/// that is surrounded by single quotes , such as `foo` in
146+
/// `<!DOCTYPE html PUBLIC 'foo' 'bar'>`.
67147
DoctypeIdentifierSingleQuoted(DoctypeKind),
148+
/// Indicates that the parser has finished parsing either a public or system identifier within a
149+
/// document type definition.
68150
AfterDoctypeIdentifier(DoctypeKind),
151+
/// Indicates that the parser has finished parsing a public identifier and now expects
152+
/// a system identifier.
69153
BetweenDoctypePublicAndSystemIdentifiers,
154+
/// Indicates that the parser is currently parsing an ill-formed document type defintion, such as
155+
/// `<!DOCTYPE html what-is-this>`.
70156
BogusDoctype,
157+
/// Indicates that the parser is currently parsing an ill-formed comment, such as
158+
/// `<? this is not what a comment should look like! >`.
71159
BogusComment,
160+
/// Interrupts the tokenizer for one single call to `step`.
161+
///
162+
/// It is unclear whether this is still necessary ([#649](https://github.com/servo/html5ever/issues/649)).
72163
Quiescent,
73164
}
74165

166+
/// Specifies how an attribute value is quoted, if at all.
75167
#[derive(PartialEq, Eq, PartialOrd, Ord, Copy, Clone, Hash, Debug)]
76168
pub enum AttrValueKind {
169+
/// A attribute value that is not surrounded by quotes, like `bar` in `foo=bar`.
77170
Unquoted,
171+
/// A attribute value that is not surrounded by quotes, like `bar` in `foo='bar'`.
78172
SingleQuoted,
173+
/// A attribute value that is not surrounded by quotes, like `bar` in `foo="bar"`.
79174
DoubleQuoted,
80175
}

0 commit comments

Comments
 (0)