|
8 | 8 | // except according to those terms.
|
9 | 9 |
|
10 | 10 | //! Tokenizer states.
|
11 |
| -//! |
12 |
| -//! This is public for use by the tokenizer tests. Other library |
13 |
| -//! users should not have to care about this. |
14 |
| -
|
15 |
| -#![allow(missing_docs)] // FIXME |
16 | 11 |
|
17 | 12 | pub use AttrValueKind::*;
|
18 | 13 | pub use DoctypeKind::*;
|
19 | 14 | pub use XmlState::*;
|
20 | 15 |
|
| 16 | +/// Specifies either the public or system identifier from a [Document Type Declaration] (DTD). |
| 17 | +/// |
| 18 | +/// [Document Type Declaration]: https://en.wikipedia.org/wiki/Document_type_declaration |
21 | 19 | #[derive(PartialEq, Eq, PartialOrd, Ord, Copy, Clone, Hash, Debug)]
|
22 | 20 | pub enum DoctypeKind {
|
| 21 | + /// The public identifier. |
23 | 22 | Public,
|
| 23 | + /// The system identifier. |
24 | 24 | System,
|
25 | 25 | }
|
26 | 26 |
|
| 27 | +/// Specifies the different states a XML tokenizer will assume during parsing. |
27 | 28 | #[derive(PartialEq, Eq, PartialOrd, Ord, Copy, Clone, Hash, Debug)]
|
28 | 29 | pub enum XmlState {
|
| 30 | + /// The initial state of the parser. |
| 31 | + /// |
| 32 | + /// It is equivalent to the [`Data`](https://html.spec.whatwg.org/#data-state) state of the html parser, |
| 33 | + /// except null codepoints do not cause errors. |
29 | 34 | Data,
|
| 35 | + /// Indicates that the parser has found a `<` character and will try to parse a tag. |
30 | 36 | TagState,
|
| 37 | + /// Indicates that the parser has consumed the `/` of a closing tag, like `</foo>`. |
31 | 38 | EndTagState,
|
| 39 | + /// Indicates that the parser is currently parsing the name of a closing tag, like the `foo` of `</foo>`. |
32 | 40 | EndTagName,
|
| 41 | + /// Indicates that the parser has finished parsing the name of a closing tag and expects a `>` to follow. |
33 | 42 | EndTagNameAfter,
|
| 43 | + /// Indicates that the parser has started parsing a [processing instruction] (PI). |
| 44 | + /// |
| 45 | + /// This state is reached after the initial `?` character has been consumed. |
| 46 | + /// |
| 47 | + /// [processing instruction]: https://en.wikipedia.org/wiki/Processing_Instruction |
34 | 48 | Pi,
|
| 49 | + /// Indicates that the parser is currently parsing the target of a [processing instruction]. |
| 50 | + /// |
| 51 | + /// For example, the target of `<?xml-stylesheet type="text/xsl" href="style.xsl"?>` is `xml-stylesheet`. |
| 52 | + /// |
| 53 | + /// [processing instruction]: https://en.wikipedia.org/wiki/Processing_Instruction |
35 | 54 | PiTarget,
|
| 55 | + /// Indicates that the parser has finished parsing the target of a [processing instruction]. |
| 56 | + /// |
| 57 | + /// [processing instruction]: https://en.wikipedia.org/wiki/Processing_Instruction |
36 | 58 | PiTargetAfter,
|
| 59 | + /// Indicates that the parser is currently parsing the data of a [processing instruction]. |
| 60 | + /// |
| 61 | + /// The "data" refers to everything between the target and the closing `?` character. |
| 62 | + /// |
| 63 | + /// [processing instruction]: https://en.wikipedia.org/wiki/Processing_Instruction |
37 | 64 | PiData,
|
| 65 | + /// Indicates that the parser has parsed the closing `?` of a [processing instruction]. |
| 66 | + /// |
| 67 | + /// [processing instruction]: https://en.wikipedia.org/wiki/Processing_Instruction |
38 | 68 | PiAfter,
|
| 69 | + /// Indicates that the parser has parsed the initial `!` of a markup declaration. |
| 70 | + /// |
| 71 | + /// Examples of such declarations include `<!ENTITY chap1 SYSTEM "chap1.xml">` or `<!-- Comment -->`. |
39 | 72 | MarkupDecl,
|
| 73 | + /// Indicates that the parser has parsed the start of a comment (`<!--`). |
40 | 74 | CommentStart,
|
| 75 | + /// Indicates that the parser has parsed the start of a comment and a `-` directly after it. |
41 | 76 | CommentStartDash,
|
| 77 | + /// Indicates that the parser is currently parsing the data within a comment. |
42 | 78 | Comment,
|
| 79 | + /// Indicates that the parser has parsed a `<` character within a comment. |
43 | 80 | CommentLessThan,
|
| 81 | + /// Indicates that the parser has parsed `<!` within a comment. |
44 | 82 | CommentLessThanBang,
|
| 83 | + /// Indicates that the parser has parsed `<!-` within a comment. |
45 | 84 | CommentLessThanBangDash,
|
| 85 | + /// Indicates that the parser has parsed `<!--` within a comment. |
46 | 86 | CommentLessThanBangDashDash,
|
| 87 | + /// Indicates that the parser has parsed two `-` characters within a comment which may or may not |
| 88 | + /// be the beginning of the comment end (`-->`). |
47 | 89 | CommentEnd,
|
| 90 | + /// Indicates that the parser has parsed a `-` character within a comment which may or may not |
| 91 | + /// be the beginning of the comment end (`-->`). |
48 | 92 | CommentEndDash,
|
| 93 | + /// Indicates that the parser has parsed `--!` within a comment which may or may not be part of the |
| 94 | + /// end of the comment. Comments in XML can be closed with `--!>`. |
49 | 95 | CommentEndBang,
|
| 96 | + /// Indicates that the parser has parsed the beginning of a CDATA section (`<![CDATA[`). |
50 | 97 | Cdata,
|
| 98 | + /// Indicates that the parser has parsed a `]` character within a CDATA section, which may be part of |
| 99 | + /// the end of the section (`]]>`). |
51 | 100 | CdataBracket,
|
| 101 | + /// Indicates that the parser has parsed two `]` characters within a CDATA section, which may be part of |
| 102 | + /// the end of the section (`]]>`). |
52 | 103 | CdataEnd,
|
| 104 | + /// Indicates that the parser is currently parsing the name of a tag, such as `foo` in `<foo>`. |
53 | 105 | TagName,
|
| 106 | + /// Indicates that the parser has parsed the `/` of a self-closing tag, such as `<foo/>`. |
54 | 107 | TagEmpty,
|
| 108 | + /// Indicates that the parser has finished parsing the name of a tag and is now expecting either attributes or |
| 109 | + /// a `>`. |
55 | 110 | TagAttrNameBefore,
|
| 111 | + /// Indicates that the parser is currently parsing the name of an attribute within a tag, such as |
| 112 | + /// `bar` in `<foo bar=baz>`. |
56 | 113 | TagAttrName,
|
| 114 | + /// Indicates that the parser has finished parsing the name of an attribute. |
57 | 115 | TagAttrNameAfter,
|
| 116 | + /// Indicates that the parser is about to parse the value of an attribute. |
58 | 117 | TagAttrValueBefore,
|
| 118 | + /// Indicates that the parser is currently parsing the value of an attribute, such as `baz` in |
| 119 | + /// `<foo bar=baz>`. |
| 120 | + /// |
| 121 | + /// Includes information about how the value is quoted, because the quotes before and after the attribute |
| 122 | + /// value need to match. |
59 | 123 | TagAttrValue(AttrValueKind),
|
| 124 | + /// Indicates that the parser has parsed the beginning of a document type definition (`<!DOCTYPE`). |
60 | 125 | Doctype,
|
| 126 | + /// Indicates that the parser expects to parse the name of the document type definition next. |
61 | 127 | BeforeDoctypeName,
|
| 128 | + /// Indicates that the parser is currently parsing the name of a document type definition, such as |
| 129 | + /// `html` in `<!DOCTYPE html>`. |
62 | 130 | DoctypeName,
|
| 131 | + /// Indicates that the parser has finished parsing the name of the document type definition and now optionally |
| 132 | + /// expects either a public or a system identifier. |
63 | 133 | AfterDoctypeName,
|
| 134 | + /// Indicates that the parser has parsed a keyword for either a public or system identifier (`PUBLIC` or `SYSTEM`). |
64 | 135 | AfterDoctypeKeyword(DoctypeKind),
|
| 136 | + /// Indicates that the parser is about to parse the value of a public or system identifier within |
| 137 | + /// a document type definition, such as `foo` in |
| 138 | + /// `<!DOCTYPE html PUBLIC "foo" "bar">`. |
65 | 139 | BeforeDoctypeIdentifier(DoctypeKind),
|
| 140 | + /// Indicates that the parser is currently parsing the value of a public or system identifier |
| 141 | + /// that is surrounded by double quotes , such as `foo` in |
| 142 | + /// `<!DOCTYPE html PUBLIC "foo" "bar">`. |
66 | 143 | DoctypeIdentifierDoubleQuoted(DoctypeKind),
|
| 144 | + /// Indicates that the parser is currently parsing the value of a public or system identifier |
| 145 | + /// that is surrounded by single quotes , such as `foo` in |
| 146 | + /// `<!DOCTYPE html PUBLIC 'foo' 'bar'>`. |
67 | 147 | DoctypeIdentifierSingleQuoted(DoctypeKind),
|
| 148 | + /// Indicates that the parser has finished parsing either a public or system identifier within a |
| 149 | + /// document type definition. |
68 | 150 | AfterDoctypeIdentifier(DoctypeKind),
|
| 151 | + /// Indicates that the parser has finished parsing a public identifier and now expects |
| 152 | + /// a system identifier. |
69 | 153 | BetweenDoctypePublicAndSystemIdentifiers,
|
| 154 | + /// Indicates that the parser is currently parsing an ill-formed document type defintion, such as |
| 155 | + /// `<!DOCTYPE html what-is-this>`. |
70 | 156 | BogusDoctype,
|
| 157 | + /// Indicates that the parser is currently parsing an ill-formed comment, such as |
| 158 | + /// `<? this is not what a comment should look like! >`. |
71 | 159 | BogusComment,
|
| 160 | + /// Interrupts the tokenizer for one single call to `step`. |
| 161 | + /// |
| 162 | + /// It is unclear whether this is still necessary ([#649](https://github.com/servo/html5ever/issues/649)). |
72 | 163 | Quiescent,
|
73 | 164 | }
|
74 | 165 |
|
| 166 | +/// Specifies how an attribute value is quoted, if at all. |
75 | 167 | #[derive(PartialEq, Eq, PartialOrd, Ord, Copy, Clone, Hash, Debug)]
|
76 | 168 | pub enum AttrValueKind {
|
| 169 | + /// A attribute value that is not surrounded by quotes, like `bar` in `foo=bar`. |
77 | 170 | Unquoted,
|
| 171 | + /// A attribute value that is not surrounded by quotes, like `bar` in `foo='bar'`. |
78 | 172 | SingleQuoted,
|
| 173 | + /// A attribute value that is not surrounded by quotes, like `bar` in `foo="bar"`. |
79 | 174 | DoubleQuoted,
|
80 | 175 | }
|
0 commit comments