Skip to content

Commit ba076bf

Browse files
author
bors-servo
authored
Auto merge of #200 - derekdreery:docs_docs_docs, r=jdm
Add documentation, and the ability to generate documentation. Multiple changes - Add documentation to types & methods in atom.rs - Add ability to add documentation to generated types What I'm trying to achieve with this is 1. Someone can look at this library and know how to use it without looking at the source code. 2. Someone who sees the generated types in another library (like html5ever) can just know they are interned strings and not worry too much about the interal details of the internment. I hope it's useful :) <!-- Reviewable:start --> --- This change is [<img src="https://reviewable.io/review_button.svg" height="34" align="absmiddle" alt="Reviewable"/>](https://reviewable.io/reviews/servo/string-cache/200) <!-- Reviewable:end -->
2 parents 4b80f2d + 8b38b6f commit ba076bf

File tree

2 files changed

+76
-6
lines changed

2 files changed

+76
-6
lines changed

src/atom.rs

Lines changed: 39 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -138,18 +138,36 @@ impl StringCache {
138138
}
139139
}
140140

141+
/// A static `PhfStrSet`
142+
///
143+
/// This trait is implemented by static sets of interned strings generated using
144+
/// `string_cache_codegen`, and `EmptyStaticAtomSet` for when strings will be added dynamically.
145+
///
146+
/// It is used by the methods of [`Atom`] to check if a string is present in the static set.
147+
///
148+
/// [`Atom`]: struct.Atom.html
141149
pub trait StaticAtomSet {
150+
/// Get the location of the static string set in the binary.
142151
fn get() -> &'static PhfStrSet;
152+
/// Get the index of the empty string, which is in every set and is used for `Atom::default`.
143153
fn empty_string_index() -> u32;
144154
}
145155

156+
/// A string set created using a [perfect hash function], specifically
157+
/// [Hash, Displace and Compress].
158+
///
159+
/// See the CHD document for the meaning of the struct fields.
160+
///
161+
/// [perfect hash function]: https://en.wikipedia.org/wiki/Perfect_hash_function
162+
/// [Hash, Displace and Compress]: http://cmph.sourceforge.net/papers/esa09.pdf
146163
pub struct PhfStrSet {
147164
pub key: u64,
148165
pub disps: &'static [(u32, u32)],
149166
pub atoms: &'static [&'static str],
150167
pub hashes: &'static [u32],
151168
}
152169

170+
/// An empty static atom set for when only dynamic strings will be added
153171
pub struct EmptyStaticAtomSet;
154172

155173
impl StaticAtomSet for EmptyStaticAtomSet {
@@ -174,6 +192,10 @@ impl StaticAtomSet for EmptyStaticAtomSet {
174192
/// Use this if you don’t care about static atoms.
175193
pub type DefaultAtom = Atom<EmptyStaticAtomSet>;
176194

195+
/// Represents a string that has been interned.
196+
///
197+
/// In reality this contains a complex packed datastructure and the methods to extract information
198+
/// from it, along with type information to tell the compiler which static set it corresponds to.
177199
pub struct Atom<Static: StaticAtomSet> {
178200
/// This field is public so that the `atom!()` macros can use it.
179201
/// You should not otherwise access this field.
@@ -207,6 +229,7 @@ impl<Static: StaticAtomSet> Atom<Static> {
207229
UnpackedAtom::from_packed(self.unsafe_data)
208230
}
209231

232+
/// Get the hash of the string as it is stored in the set.
210233
pub fn get_hash(&self) -> u32 {
211234
match unsafe { self.unpack() } {
212235
Static(index) => {
@@ -441,11 +464,7 @@ impl<Static: StaticAtomSet> Atom<Static> {
441464
let mut buffer: [u8; 64] = unsafe { mem::uninitialized() };
442465
if let Some(buffer_prefix) = buffer.get_mut(..s.len()) {
443466
buffer_prefix.copy_from_slice(s.as_bytes());
444-
// FIXME: use from std::str when stable https://github.com/rust-lang/rust/issues/41119
445-
pub unsafe fn from_utf8_unchecked_mut(v: &mut [u8]) -> &mut str {
446-
mem::transmute(v)
447-
}
448-
let as_str = unsafe { from_utf8_unchecked_mut(buffer_prefix) };
467+
let as_str = unsafe { ::std::str::from_utf8_unchecked_mut(buffer_prefix) };
449468
f(as_str);
450469
Atom::from(&*as_str)
451470
} else {
@@ -455,6 +474,9 @@ impl<Static: StaticAtomSet> Atom<Static> {
455474
}
456475
}
457476

477+
/// Like [`to_ascii_uppercase`].
478+
///
479+
/// [`to_ascii_uppercase`]: https://doc.rust-lang.org/std/ascii/trait.AsciiExt.html#tymethod.to_ascii_uppercase
458480
pub fn to_ascii_uppercase(&self) -> Self {
459481
for (i, b) in self.bytes().enumerate() {
460482
if let b'a' ... b'z' = b {
@@ -464,6 +486,9 @@ impl<Static: StaticAtomSet> Atom<Static> {
464486
self.clone()
465487
}
466488

489+
/// Like [`to_ascii_lowercase`].
490+
///
491+
/// [`to_ascii_lowercase`]: https://doc.rust-lang.org/std/ascii/trait.AsciiExt.html#tymethod.to_ascii_lowercase
467492
pub fn to_ascii_lowercase(&self) -> Self {
468493
for (i, b) in self.bytes().enumerate() {
469494
if let b'A' ... b'Z' = b {
@@ -473,10 +498,16 @@ impl<Static: StaticAtomSet> Atom<Static> {
473498
self.clone()
474499
}
475500

501+
/// Like [`eq_ignore_ascii_case`].
502+
///
503+
/// [`eq_ignore_ascii_case`]: https://doc.rust-lang.org/std/ascii/trait.AsciiExt.html#tymethod.eq_ignore_ascii_case
476504
pub fn eq_ignore_ascii_case(&self, other: &Self) -> bool {
477505
(self == other) || self.eq_str_ignore_ascii_case(&**other)
478506
}
479507

508+
/// Like [`eq_ignore_ascii_case`], but takes an unhashed string as `other`.
509+
///
510+
/// [`eq_ignore_ascii_case`]: https://doc.rust-lang.org/std/ascii/trait.AsciiExt.html#tymethod.eq_ignore_ascii_case
480511
pub fn eq_str_ignore_ascii_case(&self, other: &str) -> bool {
481512
(&**self).eq_ignore_ascii_case(other)
482513
}
@@ -525,6 +556,8 @@ fn inline_atom_slice_mut(x: &mut u64) -> &mut [u8] {
525556
}
526557

527558
impl UnpackedAtom {
559+
/// Pack a key, fitting it into a u64 with flags and data. See `string_cache_shared` for
560+
/// hints for the layout.
528561
#[inline(always)]
529562
unsafe fn pack(self) -> u64 {
530563
match self {
@@ -546,6 +579,7 @@ impl UnpackedAtom {
546579
}
547580
}
548581

582+
/// Unpack a key, extracting information from a single u64 into useable structs.
549583
#[inline(always)]
550584
unsafe fn from_packed(data: u64) -> UnpackedAtom {
551585
debug_assert!(DYNAMIC_TAG == 0); // Dynamic is untagged

string-cache-codegen/lib.rs

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,8 @@ use std::path::Path;
8282
/// A builder for a static atom set and relevant macros
8383
pub struct AtomType {
8484
path: String,
85+
atom_doc: Option<String>,
86+
static_set_doc: Option<String>,
8587
macro_name: String,
8688
macro_doc: Option<String>,
8789
atoms: HashSet<String>,
@@ -108,16 +110,40 @@ impl AtomType {
108110
/// macro_rules foo_atom {
109111
/// // Expands to: $crate::foo::FooAtom { … }
110112
/// }
113+
/// ```
111114
pub fn new(path: &str, macro_name: &str) -> Self {
112-
assert!(macro_name.ends_with("!"));
115+
assert!(macro_name.ends_with("!"), "`macro_name` must end with '!'");
113116
AtomType {
114117
path: path.to_owned(),
115118
macro_name: macro_name[..macro_name.len() - "!".len()].to_owned(),
119+
atom_doc: None,
120+
static_set_doc: None,
116121
macro_doc: None,
117122
atoms: HashSet::new(),
118123
}
119124
}
120125

126+
/// Add some documentation to the generated Atom type alias.
127+
///
128+
/// This can help the user know that the type uses interned strings.
129+
///
130+
/// Note that `docs` should not contain the `///` at the front of normal docs.
131+
pub fn with_atom_doc(&mut self, docs: &str) -> &mut Self {
132+
self.atom_doc = Some(docs.to_owned());
133+
self
134+
}
135+
136+
/// Add some documentation to the generated static set.
137+
///
138+
/// This can help the user know that this type is zero-sized and just references a static
139+
/// lookup table, or point them to the `Atom` type alias for more info.
140+
///
141+
/// Note that `docs` should not contain the `///` at the front of normal docs.
142+
pub fn with_static_set_doc(&mut self, docs: &str) -> &mut Self {
143+
self.static_set_doc = Some(docs.to_owned());
144+
self
145+
}
146+
121147
/// Add some documentation to the generated macro.
122148
///
123149
/// Note that `docs` should not contain the `///` at the front of normal docs.
@@ -176,6 +202,14 @@ impl AtomType {
176202
} else {
177203
&self.path
178204
};
205+
let atom_doc = match self.atom_doc {
206+
Some(ref doc) => quote!(#[doc = #doc]),
207+
None => quote!()
208+
};
209+
let static_set_doc = match self.static_set_doc {
210+
Some(ref doc) => quote!(#[doc = #doc]),
211+
None => quote!()
212+
};
179213
let macro_doc = match self.macro_doc {
180214
Some(ref doc) => quote!(#[doc = #doc]),
181215
None => quote!()
@@ -186,7 +220,9 @@ impl AtomType {
186220
let path = iter::repeat(quote::Ident::from(&*self.path));
187221

188222
quote! {
223+
#atom_doc
189224
pub type #type_name = ::string_cache::Atom<#static_set_name>;
225+
#static_set_doc
190226
pub struct #static_set_name;
191227
impl ::string_cache::StaticAtomSet for #static_set_name {
192228
fn get() -> &'static ::string_cache::PhfStrSet {

0 commit comments

Comments
 (0)