@@ -193,8 +193,46 @@ pub type DefaultAtom = Atom<EmptyStaticAtomSet>;
193193
194194/// Represents a string that has been interned.
195195///
196- /// In reality this contains a complex packed datastructure and the methods to extract information
197- /// from it, along with type information to tell the compiler which static set it corresponds to.
196+ /// While the type definition for `Atom` indicates that it generic on a particular
197+ /// implementation of an atom set, you don't need to worry about this. Atoms can be static
198+ /// and come from a `StaticAtomSet` generated by the `string_cache_codegen` crate, or they
199+ /// can be dynamic and created by you on an `EmptyStaticAtomSet`.
200+ ///
201+ /// `Atom` implements `Clone` but not `Copy`, since internally atoms are reference-counted;
202+ /// this means that you may need to `.clone()` an atom to keep copies to it in different
203+ /// places, or when passing it to a function that takes an `Atom` rather than an `&Atom`.
204+ ///
205+ /// ## Creating an atom at runtime
206+ ///
207+ /// If you use `string_cache_codegen` to generate a precomputed list of atoms, your code
208+ /// may then do something like read data from somewhere and extract tokens that need to be
209+ /// compared to the atoms. In this case, you can use `Atom::from(&str)` or
210+ /// `Atom::from(String)`. These create a reference-counted atom which will be
211+ /// automatically freed when all references to it are dropped.
212+ ///
213+ /// This means that your application can safely have a loop which tokenizes data, creates
214+ /// atoms from the tokens, and compares the atoms to a predefined set of keywords, without
215+ /// running the risk of arbitrary memory consumption from creating large numbers of atoms —
216+ /// as long as your application does not store clones of the atoms it creates along the
217+ /// way.
218+ ///
219+ /// For example, the following is safe and will not consume arbitrary amounts of memory:
220+ ///
221+ /// ```ignore
222+ /// let untrusted_data = "large amounts of text ...";
223+ ///
224+ /// for token in untrusted_data.split_whitespace() {
225+ /// let atom = Atom::from(token); // interns the string
226+ ///
227+ /// if atom == Atom::from("keyword") {
228+ /// // handle that keyword
229+ /// } else if atom == Atom::from("another_keyword") {
230+ /// // handle that keyword
231+ /// } else {
232+ /// println!("unknown keyword");
233+ /// }
234+ /// } // atom is dropped here, so it is not kept around in memory
235+ /// ```
198236pub struct Atom < Static : StaticAtomSet > {
199237 /// This field is public so that the `atom!()` macros can use it.
200238 /// You should not otherwise access this field.
0 commit comments