Skip to content

Commit 7c9fd8d

Browse files
author
Richard Dodd
committed
Assorted documentation
1 parent e37aed4 commit 7c9fd8d

File tree

3 files changed

+71
-6
lines changed

3 files changed

+71
-6
lines changed

markup5ever/lib.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,20 @@ extern crate string_cache;
1313
extern crate phf;
1414
pub extern crate tendril;
1515

16+
/// Create a [`SmallCharSet`], with each space-separated number stored in the set.
17+
///
18+
/// # Examples
19+
///
20+
/// ```
21+
/// # #[macro_use] extern crate markup5ever;
22+
/// # fn main() {
23+
/// let set = small_char_set!(12 54 42);
24+
/// assert_eq!(set.bits,
25+
/// 0b00000000_01000000_00000100_00000000_00000000_00000000_00010000_00000000);
26+
/// # }
27+
/// ```
28+
///
29+
/// [`SmallCharSet`]: struct.SmallCharSet.html
1630
#[macro_export]
1731
macro_rules! small_char_set ( ($($e:expr)+) => (
1832
$ crate ::SmallCharSet {

markup5ever/util/buffer_queue.rs

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,17 @@
77
// option. This file may not be copied, modified, or distributed
88
// except according to those terms.
99

10+
//! The [`BufferQueue`] struct and helper types.
11+
//!
12+
//! This type is designed for the efficient parsing of string data, especially where many
13+
//! significant characters are from the ascii range 0-63. This includes, for example, important
14+
//! characters in xml/html parsing.
15+
//!
16+
//! Good and predictable performance is achieved by avoiding allocation where possible (a.k.a. zero
17+
//! copy).
18+
//!
19+
//! [`BufferQueue`]: struct.BufferQueue.html
20+
1021

1122
use std::collections::VecDeque;
1223

@@ -15,15 +26,24 @@ use tendril::StrTendril;
1526
pub use self::SetResult::{FromSet, NotFromSet};
1627
use util::smallcharset::SmallCharSet;
1728

18-
/// Result from `pop_except_from`.
29+
/// Result from [`pop_except_from`] containing either a character from a [`SmallCharSet`], or a
30+
/// string buffer of characters not from the set.
31+
///
32+
/// [`pop_except_from`]: struct.BufferQueue.html#method.pop_except_from
33+
/// [`SmallCharSet`]: ../struct.SmallCharSet.html
1934
#[derive(PartialEq, Eq, Debug)]
2035
pub enum SetResult {
36+
/// A character from the `SmallCharSet`.
2137
FromSet(char),
38+
/// A block of text containing no characters from the `SmallCharSet`.
2239
NotFromSet(StrTendril),
2340
}
2441

25-
/// A queue of owned string buffers, which supports incrementally
26-
/// consuming characters.
42+
/// A queue of owned string buffers, which supports incrementally consuming characters.
43+
///
44+
/// Internally it uses [`VecDeque`] and has the same complexity properties.
45+
///
46+
/// [`VecDeque`]: https://doc.rust-lang.org/std/collections/struct.VecDeque.html
2747
pub struct BufferQueue {
2848
/// Buffers to process.
2949
buffers: VecDeque<StrTendril>,

markup5ever/util/smallcharset.rs

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,23 +7,54 @@
77
// option. This file may not be copied, modified, or distributed
88
// except according to those terms.
99

10+
//! This module contains a single struct [`SmallCharSet`]. See its documentation for details.
11+
//!
12+
//! [`SmallCharSet`]: struct.SmallCharSet.html
1013
1114

1215
/// Represents a set of "small characters", those with Unicode scalar
1316
/// values less than 64.
17+
///
18+
/// This is stored as a bitmap, with 1 bit for each value.
1419
pub struct SmallCharSet {
1520
pub bits: u64,
1621
}
1722

1823
impl SmallCharSet {
24+
/// Checks whether a character (u8 value below 64) is stored in the SmallCharSet.
25+
///
26+
/// # Examples
27+
///
28+
/// ```ignore
29+
/// # use markup5ever::SmallCharSet;
30+
/// let set = SmallCharSet {
31+
/// bits: 0b00000000_01000000_00000100_00000000_00000000_00000000_00010000_00000000
32+
/// };
33+
/// assert!(set.contains(64));
34+
/// assert!(set.contains(b'6')); // `b'6'` is the same as 64u8
35+
/// ```
1936
#[inline]
2037
fn contains(&self, n: u8) -> bool {
2138
0 != (self.bits & (1 << (n as usize)))
2239
}
2340

24-
/// Count the number of bytes of characters at the beginning
25-
/// of `buf` which are not in the set.
26-
/// See `tokenizer::buffer_queue::pop_except_from`.
41+
/// Count the number of bytes of characters at the beginning of `buf` which are not in the set.
42+
///
43+
/// This functionality is used in [`BufferQueue::pop_except_from`].
44+
///
45+
/// # Examples
46+
///
47+
/// ```
48+
/// # #[macro_use] extern crate markup5ever;
49+
/// # fn main() {
50+
/// let set = small_char_set!(48 49 50); // '0' '1' '2'
51+
/// // `test` is 4 chars, 😁 is 4 chars, then we meet a character in the set
52+
/// let test_str = "test😁01232afd";
53+
/// assert_eq!(set.nonmember_prefix_len(test_str), 8);
54+
/// # }
55+
/// ```
56+
///
57+
/// [`BufferQueue::pop_except_from`]: buffer_queue/struct.BufferQueue.html#method.pop_except_from
2758
pub fn nonmember_prefix_len(&self, buf: &str) -> u32 {
2859
let mut n = 0;
2960
for b in buf.bytes() {

0 commit comments

Comments
 (0)