From c69bffcb74a9eaf29981e5457a254c78521f8542 Mon Sep 17 00:00:00 2001 From: Glen De Cauwsemaecker Date: Wed, 16 Jul 2025 15:56:18 +0200 Subject: [PATCH 01/10] add typestate pattern chapter --- src/SUMMARY.md | 2 + .../typestate-pattern.md | 84 +++++++++++ .../typestate-pattern/typestate-generics.md | 141 ++++++++++++++++++ 3 files changed, 227 insertions(+) create mode 100644 src/idiomatic/leveraging-the-type-system/typestate-pattern.md create mode 100644 src/idiomatic/leveraging-the-type-system/typestate-pattern/typestate-generics.md diff --git a/src/SUMMARY.md b/src/SUMMARY.md index 1dca1f14df59..437c2b0bb08d 100644 --- a/src/SUMMARY.md +++ b/src/SUMMARY.md @@ -437,6 +437,8 @@ - [Semantic Confusion](idiomatic/leveraging-the-type-system/newtype-pattern/semantic-confusion.md) - [Parse, Don't Validate](idiomatic/leveraging-the-type-system/newtype-pattern/parse-don-t-validate.md) - [Is It Encapsulated?](idiomatic/leveraging-the-type-system/newtype-pattern/is-it-encapsulated.md) + - [Typestate Pattern](idiomatic/leveraging-the-type-system/typestate-pattern.md) + - [Typestate Pattern with Generics](idiomatic/leveraging-the-type-system/typestate-pattern/typestate-generics.md) --- diff --git a/src/idiomatic/leveraging-the-type-system/typestate-pattern.md b/src/idiomatic/leveraging-the-type-system/typestate-pattern.md new file mode 100644 index 000000000000..1753780f8404 --- /dev/null +++ b/src/idiomatic/leveraging-the-type-system/typestate-pattern.md @@ -0,0 +1,84 @@ +--- +minutes: 15 +--- + +## Typestate Pattern + +The typestate pattern uses Rust’s type system to make **invalid states +unrepresentable**. + +```rust +# use std::fmt::Write; +#[derive(Default)] +struct Serializer { output: String } +struct SerializeStruct { ser: Serializer } + +impl Serializer { + fn serialize_struct(mut self, name: &str) -> SerializeStruct { + let _ = writeln!(&mut self.output, "{name} {{"); + SerializeStruct { ser: self } + } +} + +impl SerializeStruct { + fn serialize_field(mut self, key: &str, value: &str) -> Self { + let _ = writeln!(&mut self.ser.output, " {key}={value};"); + self + } + + fn finish_struct(mut self) -> Serializer { + self.ser.output.push_str("}\n"); + self.ser + } +} + +let ser = Serializer::default() + .serialize_struct("User") + .serialize_field("id", "42") + .serialize_field("name", "Alice") + .finish_struct(); +println!("{}", ser.output); +``` + +
+ +- This example is inspired by + [Serde's `Serializer` trait](https://docs.rs/serde/latest/serde/ser/trait.Serializer.html). + For a deeper explanation of how Serde models serialization as a state machine, + see . + +- The typestate pattern allows us to model state machines using Rust’s type + system. In this case, the state machine is a simple serializer. + +- The key idea is that each state in the process, starting a struct, writing + fields, and finishing, is represented by a different type. Transitions between + states happen by consuming one value and producing another. + +- In the example above: + + - Once we begin serializing a struct, the `Serializer` is moved into the + `SerializeStruct` state. At that point, we no longer have access to the + original `Serializer`. + + - While in the `SerializeStruct` state, we can only call methods related to + writing fields. We cannot use the same instance to serialize a tuple, list, + or primitive. Those constructors simply do not exist here. + + - Only after calling `finish_struct` do we get the `Serializer` back. At that + point, we can inspect the output or start a new serialization session. + + - If we forget to call `finish_struct` and drop the `SerializeStruct` instead, + the original `Serializer` is lost. This ensures that incomplete or invalid + output can never be observed. + +- By contrast, if all methods were defined on `Serializer` itself, nothing would + prevent users from mixing serialization modes or leaving a struct unfinished. + +- This pattern avoids such misuse by making it **impossible to represent invalid + transitions**. + +- One downside of typestate modeling is potential code duplication between + states. In the next section, we will see how to use **generics** to reduce + duplication while preserving correctness. + +
diff --git a/src/idiomatic/leveraging-the-type-system/typestate-pattern/typestate-generics.md b/src/idiomatic/leveraging-the-type-system/typestate-pattern/typestate-generics.md new file mode 100644 index 000000000000..ecbd526a794a --- /dev/null +++ b/src/idiomatic/leveraging-the-type-system/typestate-pattern/typestate-generics.md @@ -0,0 +1,141 @@ +## Typestate Pattern with Generics + +Generics can be used with the typestate pattern to reduce duplication and allow +shared logic across state variants, while still encoding state transitions in +the type system. + +```rust +# fn main() -> std::io::Result<()> { +#[non_exhaustive] +struct Insecure; +struct Secure { + client_cert: Option>, +} + +trait Transport { + /* ... */ +} +impl Transport for Insecure { + /* ... */ +} +impl Transport for Secure { + /* ... */ +} + +#[non_exhaustive] +struct WantsTransport; +struct Ready { + transport: T, +} + +struct ConnectionBuilder { + host: String, + timeout: Option, + stage: T, +} + +struct Connection {/* ... */} + +impl Connection { + fn new(host: &str) -> ConnectionBuilder { + ConnectionBuilder { + host: host.to_owned(), + timeout: None, + stage: WantsTransport, + } + } +} + +impl ConnectionBuilder { + fn timeout(mut self, secs: u64) -> Self { + self.timeout = Some(secs); + self + } +} + +impl ConnectionBuilder { + fn insecure(self) -> ConnectionBuilder> { + ConnectionBuilder { + host: self.host, + timeout: self.timeout, + stage: Ready { transport: Insecure }, + } + } + + fn secure(self) -> ConnectionBuilder> { + ConnectionBuilder { + host: self.host, + timeout: self.timeout, + stage: Ready { transport: Secure { client_cert: None } }, + } + } +} + +impl ConnectionBuilder> { + fn client_certificate(mut self, raw: Vec) -> Self { + self.stage.transport.client_cert = Some(raw); + self + } +} + +impl ConnectionBuilder> { + fn connect(self) -> std::io::Result { + // ... use valid state to establish the configured connection + Ok(Connection {}) + } +} + +let _conn = Connection::new("db.local") + .secure() + .client_certificate(vec![1, 2, 3]) + .timeout(10) + .connect()?; +Ok(()) +# } +``` + +
+ +- This example extends the typestate pattern using **generic parameters** to + avoid duplication of common logic. + +- We use a generic type `T` to represent the current stage of the builder, and + share fields like `host` and `timeout` across all stages. + +- The transport phase uses `insecure()` and `secure()` to transition from + `WantsTransport` into `Ready`, where `T` is a type that implements the + `Transport` trait. + +- Only once the connection is in a `Ready` state, we can call `.connect()`, + guaranteed at compile time. + +- Using generics allows us to avoid writing separate `BuilderForSecure`, + `BuilderForInsecure`, etc. structs. + + Shared behavior, like `.timeout(...)`, can be implemented once and reused + across all states. + +- This same design appears + [in real-world libraries like **Rustls**](https://docs.rs/rustls/latest/rustls/struct.ConfigBuilder.html), + where the `ConfigBuilder` uses typestate and generics to guide users through a + safe, ordered configuration flow. + + It enforces at compile time that users must choose protocol versions, a + certificate verifier, and client certificate options, in the correct sequence, + before building a config. + +- **Downsides** of this approach include: + - The documentation of the various builder types can become difficult to + follow, since their names are generated by generics and internal structs + like `Ready`. + - Error messages from the compiler may become more opaque, especially if a + trait bound is not satisfied or a state transition is incomplete. + + The error messages might also be hard to follow due to the complexity as a + result of the nested generics types. + +- Still, in return for this complexity, you get compile-time enforcement of + valid configuration, clear builder sequencing, and no possibility of + forgetting a required step or misusing the API at runtime. + +
From 2ac209c9a9226c9aec3ef2f762783d2f3dc61cc1 Mon Sep 17 00:00:00 2001 From: Glen De Cauwsemaecker Date: Wed, 16 Jul 2025 16:01:46 +0200 Subject: [PATCH 02/10] ensure to have explicit main functions in typestate pattern --- .../typestate-pattern.md | 14 ++++++++------ .../typestate-pattern/typestate-generics.md | 16 ++++++++-------- 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/src/idiomatic/leveraging-the-type-system/typestate-pattern.md b/src/idiomatic/leveraging-the-type-system/typestate-pattern.md index 1753780f8404..e30f7dac9272 100644 --- a/src/idiomatic/leveraging-the-type-system/typestate-pattern.md +++ b/src/idiomatic/leveraging-the-type-system/typestate-pattern.md @@ -32,12 +32,14 @@ impl SerializeStruct { } } -let ser = Serializer::default() - .serialize_struct("User") - .serialize_field("id", "42") - .serialize_field("name", "Alice") - .finish_struct(); -println!("{}", ser.output); +fn main() { + let ser = Serializer::default() + .serialize_struct("User") + .serialize_field("id", "42") + .serialize_field("name", "Alice") + .finish_struct(); + println!("{}", ser.output); +} ```
diff --git a/src/idiomatic/leveraging-the-type-system/typestate-pattern/typestate-generics.md b/src/idiomatic/leveraging-the-type-system/typestate-pattern/typestate-generics.md index ecbd526a794a..b0e78c806f45 100644 --- a/src/idiomatic/leveraging-the-type-system/typestate-pattern/typestate-generics.md +++ b/src/idiomatic/leveraging-the-type-system/typestate-pattern/typestate-generics.md @@ -5,7 +5,6 @@ shared logic across state variants, while still encoding state transitions in the type system. ```rust -# fn main() -> std::io::Result<()> { #[non_exhaustive] struct Insecure; struct Secure { @@ -85,13 +84,14 @@ impl ConnectionBuilder> { } } -let _conn = Connection::new("db.local") - .secure() - .client_certificate(vec![1, 2, 3]) - .timeout(10) - .connect()?; -Ok(()) -# } +fn main() -> std::io::Result<()> { + let _conn = Connection::new("db.local") + .secure() + .client_certificate(vec![1, 2, 3]) + .timeout(10) + .connect()?; + Ok(()) +} ```
From 602ef859ba43399212ffc7c40f1e830f11ee6dc5 Mon Sep 17 00:00:00 2001 From: Glen De Cauwsemaecker Date: Wed, 16 Jul 2025 16:05:01 +0200 Subject: [PATCH 03/10] replace ser var name with serializer to avoid typo to trip up --- .../typestate-pattern.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/idiomatic/leveraging-the-type-system/typestate-pattern.md b/src/idiomatic/leveraging-the-type-system/typestate-pattern.md index e30f7dac9272..e10d12e5397a 100644 --- a/src/idiomatic/leveraging-the-type-system/typestate-pattern.md +++ b/src/idiomatic/leveraging-the-type-system/typestate-pattern.md @@ -11,34 +11,34 @@ unrepresentable**. # use std::fmt::Write; #[derive(Default)] struct Serializer { output: String } -struct SerializeStruct { ser: Serializer } +struct SerializeStruct { serializer: Serializer } impl Serializer { fn serialize_struct(mut self, name: &str) -> SerializeStruct { let _ = writeln!(&mut self.output, "{name} {{"); - SerializeStruct { ser: self } + SerializeStruct { serializer: self } } } impl SerializeStruct { fn serialize_field(mut self, key: &str, value: &str) -> Self { - let _ = writeln!(&mut self.ser.output, " {key}={value};"); + let _ = writeln!(&mut self.serializer.output, " {key}={value};"); self } fn finish_struct(mut self) -> Serializer { - self.ser.output.push_str("}\n"); - self.ser + self.serializer.output.push_str("}\n"); + self.serializer } } fn main() { - let ser = Serializer::default() + let serializer = Serializer::default() .serialize_struct("User") .serialize_field("id", "42") .serialize_field("name", "Alice") .finish_struct(); - println!("{}", ser.output); + println!("{}", serializer.output); } ``` From 4b0870eb35a401e825b7eed200168d84a7b4ab7a Mon Sep 17 00:00:00 2001 From: Glen De Cauwsemaecker Date: Sat, 2 Aug 2025 11:18:05 +0200 Subject: [PATCH 04/10] apply part of feedback --- .../typestate-pattern.md | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/idiomatic/leveraging-the-type-system/typestate-pattern.md b/src/idiomatic/leveraging-the-type-system/typestate-pattern.md index e10d12e5397a..a67dadd2147c 100644 --- a/src/idiomatic/leveraging-the-type-system/typestate-pattern.md +++ b/src/idiomatic/leveraging-the-type-system/typestate-pattern.md @@ -4,8 +4,7 @@ minutes: 15 ## Typestate Pattern -The typestate pattern uses Rust’s type system to make **invalid states -unrepresentable**. +Typestate is the practice of encoding a part of the state of the value in its type, preventing incorrect or inapplicable operations from being called on the value. ```rust # use std::fmt::Write; @@ -52,10 +51,19 @@ fn main() { - The typestate pattern allows us to model state machines using Rust’s type system. In this case, the state machine is a simple serializer. -- The key idea is that each state in the process, starting a struct, writing - fields, and finishing, is represented by a different type. Transitions between +- The key idea is that at each state in the process, we can only + do the actions which are valid for that state. Transitions between states happen by consuming one value and producing another. +```bob ++------------+ serialize struct +-----------------+ +| Serializer +-------------------->| SerializeStruct |<-------+ ++------------+ +-+-----+---------+ | + ^ | | | + | finish struct | | serialize field | + +-----------------------------+ +------------------+ +``` + - In the example above: - Once we begin serializing a struct, the `Serializer` is moved into the From 14cc136c3e6fe16de06db455fd8e5f59ff61080b Mon Sep 17 00:00:00 2001 From: Glen De Cauwsemaecker Date: Sat, 2 Aug 2025 11:46:36 +0200 Subject: [PATCH 05/10] rework the initial typestate no-generic content --- src/SUMMARY.md | 1 + .../typestate-pattern.md | 114 +++++++++--------- .../typestate-pattern/typestate-example.md | 98 +++++++++++++++ 3 files changed, 153 insertions(+), 60 deletions(-) create mode 100644 src/idiomatic/leveraging-the-type-system/typestate-pattern/typestate-example.md diff --git a/src/SUMMARY.md b/src/SUMMARY.md index 437c2b0bb08d..fce36275a981 100644 --- a/src/SUMMARY.md +++ b/src/SUMMARY.md @@ -438,6 +438,7 @@ - [Parse, Don't Validate](idiomatic/leveraging-the-type-system/newtype-pattern/parse-don-t-validate.md) - [Is It Encapsulated?](idiomatic/leveraging-the-type-system/newtype-pattern/is-it-encapsulated.md) - [Typestate Pattern](idiomatic/leveraging-the-type-system/typestate-pattern.md) + - [Typestate Pattern Example](idiomatic/leveraging-the-type-system/typestate-pattern/typestate-example.md) - [Typestate Pattern with Generics](idiomatic/leveraging-the-type-system/typestate-pattern/typestate-generics.md) --- diff --git a/src/idiomatic/leveraging-the-type-system/typestate-pattern.md b/src/idiomatic/leveraging-the-type-system/typestate-pattern.md index a67dadd2147c..627101c85086 100644 --- a/src/idiomatic/leveraging-the-type-system/typestate-pattern.md +++ b/src/idiomatic/leveraging-the-type-system/typestate-pattern.md @@ -1,94 +1,88 @@ --- -minutes: 15 +minutes: 30 --- -## Typestate Pattern +## Typestate Pattern: Problem -Typestate is the practice of encoding a part of the state of the value in its type, preventing incorrect or inapplicable operations from being called on the value. +How can we ensure that only valid operations are allowed on a value based on its +current state? + +```rust,editable +use std::fmt::Write as _; -```rust -# use std::fmt::Write; #[derive(Default)] -struct Serializer { output: String } -struct SerializeStruct { serializer: Serializer } +struct Serializer { + output: String, +} impl Serializer { - fn serialize_struct(mut self, name: &str) -> SerializeStruct { + fn serialize_struct_start(&mut self, name: &str) { let _ = writeln!(&mut self.output, "{name} {{"); - SerializeStruct { serializer: self } } -} -impl SerializeStruct { - fn serialize_field(mut self, key: &str, value: &str) -> Self { - let _ = writeln!(&mut self.serializer.output, " {key}={value};"); - self + fn serialize_struct_field(&mut self, key: &str, value: &str) { + let _ = writeln!(&mut self.output, " {key}={value};"); + } + + fn serialize_struct_end(&mut self) { + self.output.push_str("}\n"); } - fn finish_struct(mut self) -> Serializer { - self.serializer.output.push_str("}\n"); - self.serializer + fn finish(self) -> String { + self.output } } fn main() { - let serializer = Serializer::default() - .serialize_struct("User") - .serialize_field("id", "42") - .serialize_field("name", "Alice") - .finish_struct(); - println!("{}", serializer.output); + let mut serializer = Serializer::default(); + serializer.serialize_struct_start("User"); + serializer.serialize_struct_field("id", "42"); + serializer.serialize_struct_field("name", "Alice"); + + // serializer.serialize_struct_end(); // ← Oops! Forgotten + + println!("{}", serializer.finish()); } ```
-- This example is inspired by - [Serde's `Serializer` trait](https://docs.rs/serde/latest/serde/ser/trait.Serializer.html). - For a deeper explanation of how Serde models serialization as a state machine, - see . - -- The typestate pattern allows us to model state machines using Rust’s type - system. In this case, the state machine is a simple serializer. - -- The key idea is that at each state in the process, we can only - do the actions which are valid for that state. Transitions between - states happen by consuming one value and producing another. +- This `Serializer` is meant to write a structured value. The expected usage + follows this sequence: ```bob -+------------+ serialize struct +-----------------+ -| Serializer +-------------------->| SerializeStruct |<-------+ -+------------+ +-+-----+---------+ | - ^ | | | - | finish struct | | serialize field | - +-----------------------------+ +------------------+ +serialize struct start +-+--------------------- + | + +--> serialize struct field + -+--------------------- + | + +--> serialize struct field + -+--------------------- + | + +--> serialize struct end ``` -- In the example above: - - - Once we begin serializing a struct, the `Serializer` is moved into the - `SerializeStruct` state. At that point, we no longer have access to the - original `Serializer`. +- However, in this example we forgot to call `serialize_struct_end()` before + `finish()`. As a result, the serialized output is incomplete or syntactically + incorrect. - - While in the `SerializeStruct` state, we can only call methods related to - writing fields. We cannot use the same instance to serialize a tuple, list, - or primitive. Those constructors simply do not exist here. +- One approach to fix this would be to track internal state manually, and return + a `Result` from methods like `serialize_struct_field()` or `finish()` if the + current state is invalid. - - Only after calling `finish_struct` do we get the `Serializer` back. At that - point, we can inspect the output or start a new serialization session. +- But this has downsides: - - If we forget to call `finish_struct` and drop the `SerializeStruct` instead, - the original `Serializer` is lost. This ensures that incomplete or invalid - output can never be observed. + - It is easy to get wrong as an implementer. Rust’s type system cannot help + enforce the correctness of our state transitions. -- By contrast, if all methods were defined on `Serializer` itself, nothing would - prevent users from mixing serialization modes or leaving a struct unfinished. + - It also adds unnecessary burden on the user, who must handle `Result` values + for operations that are misused in source code rather than at runtime. -- This pattern avoids such misuse by making it **impossible to represent invalid - transitions**. +- A better solution is to model the valid state transitions directly in the type + system. -- One downside of typestate modeling is potential code duplication between - states. In the next section, we will see how to use **generics** to reduce - duplication while preserving correctness. + In the next slide, we will apply the **typestate pattern** to enforce correct + usage at compile time and make invalid states unrepresentable.
diff --git a/src/idiomatic/leveraging-the-type-system/typestate-pattern/typestate-example.md b/src/idiomatic/leveraging-the-type-system/typestate-pattern/typestate-example.md new file mode 100644 index 000000000000..bde350525e9e --- /dev/null +++ b/src/idiomatic/leveraging-the-type-system/typestate-pattern/typestate-example.md @@ -0,0 +1,98 @@ +## Typestate Pattern: Example + +The typestate pattern encodes part of a value’s runtime state into its type. +This allows us to prevent invalid or inapplicable operations at compile time. + +```rust,editable +use std::fmt::Write as _; + +#[derive(Default)] +struct Serializer { + output: String, +} + +struct SerializeStruct { + serializer: Serializer, +} + +impl Serializer { + fn serialize_struct(mut self, name: &str) -> SerializeStruct { + let _ = writeln!(&mut self.output, "{name} {{"); + SerializeStruct { serializer: self } + } + + fn finish(self) -> String { + self.output + } +} + +impl SerializeStruct { + fn serialize_field(mut self, key: &str, value: &str) -> Self { + let _ = writeln!(&mut self.serializer.output, " {key}={value};"); + self + } + + fn finish_struct(mut self) -> Serializer { + self.serializer.output.push_str("}\n"); + self.serializer + } +} + +fn main() { + let serializer = Serializer::default() + .serialize_struct("User") + .serialize_field("id", "42") + .serialize_field("name", "Alice") + .finish_struct(); + + println!("{}", serializer.finish()); +} +``` + +
+ +- This example is inspired by Serde’s + [`Serializer` trait](https://docs.rs/serde/latest/serde/ser/trait.Serializer.html). + Serde uses typestates internally to ensure serialization follows a valid + structure. For more, see: + +- The key idea behind typestate is that state transitions happen by consuming a + value and producing a new one. At each step, only operations valid for that + state are available. + +```bob ++------------+ serialize struct +-----------------+ +| Serializer +-------------------->| SerializeStruct |<-------+ ++--+---------+ +-+-----+---------+ | + | ^ | | | + | | finish struct | | serialize field | + | +-----------------------------+ +------------------+ + | + +---> finish +``` + +- In this example: + + - We begin with a `Serializer`, which only allows us to start serializing a + struct. + + - Once we call `.serialize_struct(...)`, ownership moves into a + `SerializeStruct` value. From that point on, we can only call methods + related to serializing struct fields. + + - The original `Serializer` is no longer accessible — preventing us from + mixing modes (like writing a tuple or primitive mid-struct) or calling + `finish()` too early. + + - Only after calling `.finish_struct()` do we receive the `Serializer` back. + At that point, the output can be finalized or reused. + +- If we forget to call `finish_struct()` and drop the `SerializeStruct` early, + the `Serializer` is also dropped. This ensures incomplete output cannot leak + into the system. + +- By contrast, if we had implemented everything on `Serializer` directly — as + seen on the previous slide, nothing would stop someone from skipping important + steps or mixing serialization flows. + +
From b61c3378c5ac7ba3a12557fc366c0932797e2b6a Mon Sep 17 00:00:00 2001 From: Glen De Cauwsemaecker Date: Sun, 3 Aug 2025 11:25:18 +0200 Subject: [PATCH 06/10] write new draft of typestate advanced intro this is again in the flow of a problem statement first, building on our original example, and in next slide we'll add the solution with generics --- src/SUMMARY.md | 1 + .../typestate-pattern/typestate-advanced.md | 96 +++++++++++++ .../typestate-pattern/typestate-generics.md | 136 +----------------- 3 files changed, 101 insertions(+), 132 deletions(-) create mode 100644 src/idiomatic/leveraging-the-type-system/typestate-pattern/typestate-advanced.md diff --git a/src/SUMMARY.md b/src/SUMMARY.md index fce36275a981..e09226bd216c 100644 --- a/src/SUMMARY.md +++ b/src/SUMMARY.md @@ -439,6 +439,7 @@ - [Is It Encapsulated?](idiomatic/leveraging-the-type-system/newtype-pattern/is-it-encapsulated.md) - [Typestate Pattern](idiomatic/leveraging-the-type-system/typestate-pattern.md) - [Typestate Pattern Example](idiomatic/leveraging-the-type-system/typestate-pattern/typestate-example.md) + - [Beyond Simple Typestate](idiomatic/leveraging-the-type-system/typestate-pattern/typestate-advanced.md) - [Typestate Pattern with Generics](idiomatic/leveraging-the-type-system/typestate-pattern/typestate-generics.md) --- diff --git a/src/idiomatic/leveraging-the-type-system/typestate-pattern/typestate-advanced.md b/src/idiomatic/leveraging-the-type-system/typestate-pattern/typestate-advanced.md new file mode 100644 index 000000000000..f16521e43a94 --- /dev/null +++ b/src/idiomatic/leveraging-the-type-system/typestate-pattern/typestate-advanced.md @@ -0,0 +1,96 @@ +## Beyond Simple Typestate + +How do we manage increasingly complex configuration flows with many possible +states and transitions, while still preventing incompatible operations? + +```rust +struct Serializer {/* [...] */} +struct SerializeStruct {/* [...] */} +struct SerializeStructProperty {/* [...] */} +struct SerializeList {/* [...] */} + +impl Serializer { + // TODO, implement: + // + // fn serialize_struct(self, name: &str) -> SerializeStruct + // fn finish(self) -> String +} + +impl SerializeStruct { + // TODO, implement: + // + // fn serialize_property(mut self, name: &str) -> SerializeStructProperty + + // TODO, + // How should we finish this struct? This depends on where it appears: + // - At the root level: return `Serializer` + // - As a property inside another struct: return `SerializeStruct` + // - As a value inside a list: return `SerializeList` + // + // fn finish(self) -> ??? +} + +impl SerializeStructProperty { + // TODO, implement: + // + // fn serialize_string(self, value: &str) -> SerializeStruct + // fn serialize_struct(self, name: &str) -> SerializeStruct + // fn serialize_list(self) -> SerializeList + // fn finish(self) -> SerializeStruct +} + +impl SerializeList { + // TODO, implement: + // + // fn serialize_string(mut self, value: &str) -> Self + // fn serialize_struct(mut self, value: &str) -> SerializeStruct + // fn serialize_list(mut self) -> SerializeList + + // TODO: + // Like `SerializeStruct::finish`, the return type depends on nesting. + // + // fn finish(mut self) -> ??? +} +``` + +
+ +- Building on our previous serializer, we now want to support **nested + structures** and **lists**. + +- However, this introduces both **duplication** and **structural complexity**. + + `SerializeStructProperty` and `SerializeList` now share similar logic (e.g. + adding strings, nested structs, or nested lists). + +- Even more critically, we now hit a **type system limitation**: we cannot + cleanly express what `finish()` should return without duplicating variants for + every nesting context (e.g. root, struct, list). + +- To better understand this limitation, let’s map the valid transitions: + +```bob + +-----------+ +---------+------------+-----+ + | | | | | | + V | V | V | + + | +serializer --> structure --> property --> list +-+ + + | ^ | + V | | + +-----------+ + String +``` + +- From this diagram, we can observe: + - The transitions are recursive + - The return types depend on _where_ a substructure or list appears + - Each context requires a return path to its parent + +- With only concrete types, this becomes unmanageable. Our current approach + leads to an explosion of types and manual wiring. + +- In the next chapter, we’ll see how **generics** let us model recursive flows + with less boilerplate, while still enforcing valid operations at compile time. + +
diff --git a/src/idiomatic/leveraging-the-type-system/typestate-pattern/typestate-generics.md b/src/idiomatic/leveraging-the-type-system/typestate-pattern/typestate-generics.md index b0e78c806f45..1ad01edc1c46 100644 --- a/src/idiomatic/leveraging-the-type-system/typestate-pattern/typestate-generics.md +++ b/src/idiomatic/leveraging-the-type-system/typestate-pattern/typestate-generics.md @@ -1,141 +1,13 @@ ## Typestate Pattern with Generics -Generics can be used with the typestate pattern to reduce duplication and allow -shared logic across state variants, while still encoding state transitions in -the type system. +TODO -```rust -#[non_exhaustive] -struct Insecure; -struct Secure { - client_cert: Option>, -} - -trait Transport { - /* ... */ -} -impl Transport for Insecure { - /* ... */ -} -impl Transport for Secure { - /* ... */ -} - -#[non_exhaustive] -struct WantsTransport; -struct Ready { - transport: T, -} - -struct ConnectionBuilder { - host: String, - timeout: Option, - stage: T, -} - -struct Connection {/* ... */} - -impl Connection { - fn new(host: &str) -> ConnectionBuilder { - ConnectionBuilder { - host: host.to_owned(), - timeout: None, - stage: WantsTransport, - } - } -} - -impl ConnectionBuilder { - fn timeout(mut self, secs: u64) -> Self { - self.timeout = Some(secs); - self - } -} - -impl ConnectionBuilder { - fn insecure(self) -> ConnectionBuilder> { - ConnectionBuilder { - host: self.host, - timeout: self.timeout, - stage: Ready { transport: Insecure }, - } - } - - fn secure(self) -> ConnectionBuilder> { - ConnectionBuilder { - host: self.host, - timeout: self.timeout, - stage: Ready { transport: Secure { client_cert: None } }, - } - } -} - -impl ConnectionBuilder> { - fn client_certificate(mut self, raw: Vec) -> Self { - self.stage.transport.client_cert = Some(raw); - self - } -} - -impl ConnectionBuilder> { - fn connect(self) -> std::io::Result { - // ... use valid state to establish the configured connection - Ok(Connection {}) - } -} - -fn main() -> std::io::Result<()> { - let _conn = Connection::new("db.local") - .secure() - .client_certificate(vec![1, 2, 3]) - .timeout(10) - .connect()?; - Ok(()) -} +```rust,editable +// TODO ```
-- This example extends the typestate pattern using **generic parameters** to - avoid duplication of common logic. - -- We use a generic type `T` to represent the current stage of the builder, and - share fields like `host` and `timeout` across all stages. - -- The transport phase uses `insecure()` and `secure()` to transition from - `WantsTransport` into `Ready`, where `T` is a type that implements the - `Transport` trait. - -- Only once the connection is in a `Ready` state, we can call `.connect()`, - guaranteed at compile time. - -- Using generics allows us to avoid writing separate `BuilderForSecure`, - `BuilderForInsecure`, etc. structs. - - Shared behavior, like `.timeout(...)`, can be implemented once and reused - across all states. - -- This same design appears - [in real-world libraries like **Rustls**](https://docs.rs/rustls/latest/rustls/struct.ConfigBuilder.html), - where the `ConfigBuilder` uses typestate and generics to guide users through a - safe, ordered configuration flow. - - It enforces at compile time that users must choose protocol versions, a - certificate verifier, and client certificate options, in the correct sequence, - before building a config. - -- **Downsides** of this approach include: - - The documentation of the various builder types can become difficult to - follow, since their names are generated by generics and internal structs - like `Ready`. - - Error messages from the compiler may become more opaque, especially if a - trait bound is not satisfied or a state transition is incomplete. - - The error messages might also be hard to follow due to the complexity as a - result of the nested generics types. - -- Still, in return for this complexity, you get compile-time enforcement of - valid configuration, clear builder sequencing, and no possibility of - forgetting a required step or misusing the API at runtime. +- TODO
From 11481c74e4b66eeae4c1fdeb0862a3db8be0a16b Mon Sep 17 00:00:00 2001 From: Glen De Cauwsemaecker Date: Sun, 3 Aug 2025 20:11:21 +0200 Subject: [PATCH 07/10] add first new draft for generic typestate --- .../typestate-pattern/typestate-advanced.md | 9 +- .../typestate-pattern/typestate-generics.md | 263 +++++++++++++++++- 2 files changed, 264 insertions(+), 8 deletions(-) diff --git a/src/idiomatic/leveraging-the-type-system/typestate-pattern/typestate-advanced.md b/src/idiomatic/leveraging-the-type-system/typestate-pattern/typestate-advanced.md index f16521e43a94..0f95b5b53aeb 100644 --- a/src/idiomatic/leveraging-the-type-system/typestate-pattern/typestate-advanced.md +++ b/src/idiomatic/leveraging-the-type-system/typestate-pattern/typestate-advanced.md @@ -76,10 +76,11 @@ impl SerializeList { + | serializer --> structure --> property --> list +-+ - | ^ | - V | | - +-----------+ - String + | | ^ | ^ + V | | | | + | +-----------+ | + String | | + +--------------------------+ ``` - From this diagram, we can observe: diff --git a/src/idiomatic/leveraging-the-type-system/typestate-pattern/typestate-generics.md b/src/idiomatic/leveraging-the-type-system/typestate-pattern/typestate-generics.md index 1ad01edc1c46..57af8c1db11c 100644 --- a/src/idiomatic/leveraging-the-type-system/typestate-pattern/typestate-generics.md +++ b/src/idiomatic/leveraging-the-type-system/typestate-pattern/typestate-generics.md @@ -1,13 +1,268 @@ ## Typestate Pattern with Generics -TODO +By combining typestate modeling with generics, we can express a wider range of +valid states and transitions without duplicating logic. This approach is +especially useful when the number of states grows or when multiple states share +behavior but differ in structure. -```rust,editable -// TODO +```rust +# use std::fmt::Write as _; +# +struct Serializer { + // [...] + # indent: usize, + # buffer: String, + # state: S, +} + +struct Root; +struct Struct(S); +struct List(S); +struct Property(S); + +impl Serializer { + fn new() -> Self { + // [...] + # Self { + # indent: 0, + # buffer: String::new(), + # state: Root, + # } + } + + fn serialize_struct(mut self, name: &str) -> Serializer> { + // [...] + # writeln!(self.buffer, "{name} {{").unwrap(); + # Serializer { + # indent: self.indent + 1, + # buffer: self.buffer, + # state: Struct(self.state), + # } + } + + fn finish(self) -> String { + // [...] + # self.buffer + } +} + +impl Serializer { + fn buffer_size(&self) -> usize { + // [...] + # self.buffer.len() + } +} + +impl Serializer> { + fn serialize_property(mut self, name: &str) -> Serializer>> { + // [...] + # write!(self.buffer, "{}{name}: ", " ".repeat(self.indent * 2)).unwrap(); + # Serializer { + # indent: self.indent, + # buffer: self.buffer, + # state: Property(self.state), + # } + } + + fn finish_struct(mut self) -> Serializer { + // [...] + # self.indent -= 1; + # writeln!(self.buffer, "{}}}", " ".repeat(self.indent * 2)).unwrap(); + # Serializer { + # indent: self.indent, + # buffer: self.buffer, + # state: self.state.0, + # } + } +} + +impl Serializer>> { + fn serialize_struct(mut self, name: &str) -> Serializer>> { + // [...] + # writeln!(self.buffer, "{name} {{").unwrap(); + # Serializer { + # indent: self.indent + 1, + # buffer: self.buffer, + # state: Struct(self.state.0), + # } + } + + fn serialize_list(mut self) -> Serializer>> { + // [...] + # writeln!(self.buffer, "[").unwrap(); + # Serializer { + # indent: self.indent + 1, + # buffer: self.buffer, + # state: List(self.state.0), + # } + } + + fn serialize_string(mut self, value: &str) -> Serializer> { + // [...] + # writeln!(self.buffer, "{value},").unwrap(); + # Serializer { + # indent: self.indent, + # buffer: self.buffer, + # state: self.state.0, + # } + } +} + +impl Serializer> { + fn serialize_struct(mut self, name: &str) -> Serializer>> { + // [...] + # writeln!(self.buffer, "{}{name} {{", " ".repeat(self.indent * 2)).unwrap(); + # Serializer { + # indent: self.indent + 1, + # buffer: self.buffer, + # state: Struct(self.state), + # } + } + + fn serialize_string(mut self, value: &str) -> Self { + // [...] + # writeln!(self.buffer, "{}{value},", " ".repeat(self.indent * 2)).unwrap(); + # self + } + + fn finish_list(mut self) -> Serializer { + // [...] + # self.indent -= 1; + # writeln!(self.buffer, "{}]", " ".repeat(self.indent * 2)).unwrap(); + # Serializer { + # indent: self.indent, + # buffer: self.buffer, + # state: self.state.0, + # } + } +} + +fn main() { + # #[rustfmt::skip] + let serializer = Serializer::new() + .serialize_struct("Foo") + .serialize_property("bar") + .serialize_struct("Bar") + .serialize_property("baz") + .serialize_list() + .serialize_string("abc") + .serialize_struct("Baz") + .serialize_property("partial") + .serialize_string("def") + .serialize_property("empty") + .serialize_struct("Empty") + .finish_struct() + .finish_struct() + .finish_list() + .finish_struct() + .finish_struct(); + + # let buffer_size = serializer.buffer_size(); + let output = serializer.finish(); + + # println!("buffer size = {buffer_size}\n---"); + println!("{output}"); + + // These will all fail at compile time: + + // Serializer::new().serialize_list(); + // Serializer::new().serialize_string("foo"); + // Serializer::new().serialize_struct("Foo").serialize_string("bar"); + // Serializer::new().serialize_struct("Foo").serialize_list(); + // Serializer::new().serialize_property("foo"); +} ```
-- TODO +- The full code for this example is available + [in the playground](https://play.rust-lang.org/?version=stable&mode=debug&edition=2021&gist=48b106089ca600453f3ed00a0a31af26) + +- By using generics to track the parent context, we can construct arbitrarily + nested serializers that enforce valid transitions between struct, list, and + property states. + +- This lets us build a recursive structure while preserving control over what + methods are accessible in each state. + +- Here's how the flow maps to a state machine: + +```bob + +-----------+ +---------+------------+-----+ + | | | | | | + V | V | V | + + | +serializer --> structure --> property --> list +-+ + + | | ^ | ^ + V | | | | + | +-----------+ | + String | | + +--------------------------+ +``` + +- And this is reflected directly in the types of our serializer: + +```bob + +------+ + finish | | + serialize struct V | + struct ++---------------------+ --------------> +-----------------------------+ <---------------+ +| Serializer [ Root ] | | Serializer [ Struct [ S ] ] | | ++---------------------+ <-------------- +-----------------------------+ <-----------+ | + finish struct | | + | | serialize | | | + | +----------+ property V serialize | | + | | string or | | +finish | | +-------------------------------+ struct | | + V | | Serializer [ Property [ S ] ] | ------------+ | + finish | +-------------------------------+ | + +--------+ struct | | + | String | | serialize | | + +--------+ | list V | + | finish | + | +---------------------------+ list | + +------> | Serializer [ List [ S ] ] | ----------------+ + +---------------------------+ + serialize + list or string ^ + | or finish list | + +-------------------+ +``` + +- Of course, this pattern isn't a silver bullet. It still allows issues like: + - Empty or invalid property names (which can be fixed using + [the newtype pattern](../newtype-pattern.md)) + - Duplicate property names (which could be tracked in `Struct` or handled + via `Result`) + +- If validation failures occur, we can also change method signatures to return a + `Result`, allowing recovery: + + ```rust,compile_fail + struct PropertySerializeError { + kind: PropertyError, + serializer: Serializer>, + } + + impl Serializer> { + fn serialize_property( + self, + name: &str, + ) -> Result>>, PropertySerializeError> { + /* ... */ + } + } + ``` + +- While this API is powerful, it’s not always ergonomic. Production serializers + typically favor simpler APIs and reserve the typestate pattern for enforcing + critical invariants. + +- One excellent real-world example is + [`rustls::ClientConfig`](https://docs.rs/rustls/latest/rustls/client/struct.ClientConfig.html#method.builder), + which uses typestate with generics to guide the user through safe and correct + configuration steps.
From 6b041937356522e770fd3cfc0399ed0a1e65f95b Mon Sep 17 00:00:00 2001 From: Glen De Cauwsemaecker Date: Sun, 3 Aug 2025 20:32:50 +0200 Subject: [PATCH 08/10] improve typestate pattern slides --- .../leveraging-the-type-system/typestate-pattern.md | 3 ++- .../typestate-pattern/typestate-advanced.md | 3 --- .../typestate-pattern/typestate-example.md | 11 ++++++----- .../typestate-pattern/typestate-generics.md | 4 ++-- 4 files changed, 10 insertions(+), 11 deletions(-) diff --git a/src/idiomatic/leveraging-the-type-system/typestate-pattern.md b/src/idiomatic/leveraging-the-type-system/typestate-pattern.md index 627101c85086..bfe864528a5b 100644 --- a/src/idiomatic/leveraging-the-type-system/typestate-pattern.md +++ b/src/idiomatic/leveraging-the-type-system/typestate-pattern.md @@ -83,6 +83,7 @@ serialize struct start system. In the next slide, we will apply the **typestate pattern** to enforce correct - usage at compile time and make invalid states unrepresentable. + usage at compile time and make it impossible to call incompatible methods or + forget to do a required action.
diff --git a/src/idiomatic/leveraging-the-type-system/typestate-pattern/typestate-advanced.md b/src/idiomatic/leveraging-the-type-system/typestate-pattern/typestate-advanced.md index 0f95b5b53aeb..fd10ef5e163f 100644 --- a/src/idiomatic/leveraging-the-type-system/typestate-pattern/typestate-advanced.md +++ b/src/idiomatic/leveraging-the-type-system/typestate-pattern/typestate-advanced.md @@ -60,9 +60,6 @@ impl SerializeList { - However, this introduces both **duplication** and **structural complexity**. - `SerializeStructProperty` and `SerializeList` now share similar logic (e.g. - adding strings, nested structs, or nested lists). - - Even more critically, we now hit a **type system limitation**: we cannot cleanly express what `finish()` should return without duplicating variants for every nesting context (e.g. root, struct, list). diff --git a/src/idiomatic/leveraging-the-type-system/typestate-pattern/typestate-example.md b/src/idiomatic/leveraging-the-type-system/typestate-pattern/typestate-example.md index bde350525e9e..9a22e3ab83fb 100644 --- a/src/idiomatic/leveraging-the-type-system/typestate-pattern/typestate-example.md +++ b/src/idiomatic/leveraging-the-type-system/typestate-pattern/typestate-example.md @@ -17,7 +17,7 @@ struct SerializeStruct { impl Serializer { fn serialize_struct(mut self, name: &str) -> SerializeStruct { - let _ = writeln!(&mut self.output, "{name} {{"); + writeln!(&mut self.output, "{name} {{").unwrap(); SerializeStruct { serializer: self } } @@ -28,7 +28,7 @@ impl Serializer { impl SerializeStruct { fn serialize_field(mut self, key: &str, value: &str) -> Self { - let _ = writeln!(&mut self.serializer.output, " {key}={value};"); + writeln!(&mut self.serializer.output, " {key}={value};").unwrap(); self } @@ -62,8 +62,9 @@ fn main() { ```bob +------------+ serialize struct +-----------------+ -| Serializer +-------------------->| SerializeStruct |<-------+ -+--+---------+ +-+-----+---------+ | +| Serializer | ------------------> | SerializeStruct | <------+ ++------------+ +-----------------+ | + | | ^ | | | | | finish struct | | serialize field | | +-----------------------------+ +------------------+ @@ -81,7 +82,7 @@ fn main() { related to serializing struct fields. - The original `Serializer` is no longer accessible — preventing us from - mixing modes (like writing a tuple or primitive mid-struct) or calling + mixing modes (such as starting another _struct_ mid-struct) or calling `finish()` too early. - Only after calling `.finish_struct()` do we receive the `Serializer` back. diff --git a/src/idiomatic/leveraging-the-type-system/typestate-pattern/typestate-generics.md b/src/idiomatic/leveraging-the-type-system/typestate-pattern/typestate-generics.md index 57af8c1db11c..9a83957802dd 100644 --- a/src/idiomatic/leveraging-the-type-system/typestate-pattern/typestate-generics.md +++ b/src/idiomatic/leveraging-the-type-system/typestate-pattern/typestate-generics.md @@ -176,7 +176,7 @@ fn main() {
- The full code for this example is available - [in the playground](https://play.rust-lang.org/?version=stable&mode=debug&edition=2021&gist=48b106089ca600453f3ed00a0a31af26) + [in the playground](https://play.rust-lang.org/?version=stable&mode=debug&edition=2021&gist=48b106089ca600453f3ed00a0a31af26). - By using generics to track the parent context, we can construct arbitrarily nested serializers that enforce valid transitions between struct, list, and @@ -234,7 +234,7 @@ finish | | +-------------------------------+ struct - Of course, this pattern isn't a silver bullet. It still allows issues like: - Empty or invalid property names (which can be fixed using [the newtype pattern](../newtype-pattern.md)) - - Duplicate property names (which could be tracked in `Struct` or handled + - Duplicate property names (which could be tracked in `Struct` and handled via `Result`) - If validation failures occur, we can also change method signatures to return a From e431a47fed45748ebca575f69cd53fa9c2e040a0 Mon Sep 17 00:00:00 2001 From: Glen De Cauwsemaecker Date: Sat, 30 Aug 2025 14:00:14 +0200 Subject: [PATCH 09/10] address some of @randomPoison's feedback --- .../typestate-pattern.md | 16 +------------- .../typestate-pattern/typestate-example.md | 22 ++++++++++--------- .../typestate-pattern/typestate-generics.md | 16 ++++++-------- 3 files changed, 20 insertions(+), 34 deletions(-) diff --git a/src/idiomatic/leveraging-the-type-system/typestate-pattern.md b/src/idiomatic/leveraging-the-type-system/typestate-pattern.md index bfe864528a5b..0e20d9127cc5 100644 --- a/src/idiomatic/leveraging-the-type-system/typestate-pattern.md +++ b/src/idiomatic/leveraging-the-type-system/typestate-pattern.md @@ -47,21 +47,7 @@ fn main() {
-- This `Serializer` is meant to write a structured value. The expected usage - follows this sequence: - -```bob -serialize struct start --+--------------------- - | - +--> serialize struct field - -+--------------------- - | - +--> serialize struct field - -+--------------------- - | - +--> serialize struct end -``` +- This `Serializer` is meant to write a structured value. - However, in this example we forgot to call `serialize_struct_end()` before `finish()`. As a result, the serialized output is incomplete or syntactically diff --git a/src/idiomatic/leveraging-the-type-system/typestate-pattern/typestate-example.md b/src/idiomatic/leveraging-the-type-system/typestate-pattern/typestate-example.md index 9a22e3ab83fb..b0c252f12f8c 100644 --- a/src/idiomatic/leveraging-the-type-system/typestate-pattern/typestate-example.md +++ b/src/idiomatic/leveraging-the-type-system/typestate-pattern/typestate-example.md @@ -49,16 +49,7 @@ fn main() { } ``` -
- -- This example is inspired by Serde’s - [`Serializer` trait](https://docs.rs/serde/latest/serde/ser/trait.Serializer.html). - Serde uses typestates internally to ensure serialization follows a valid - structure. For more, see: - -- The key idea behind typestate is that state transitions happen by consuming a - value and producing a new one. At each step, only operations valid for that - state are available. +`Serializer` usage flowchart: ```bob +------------+ serialize struct +-----------------+ @@ -72,6 +63,17 @@ fn main() { +---> finish ``` +
+ +- This example is inspired by Serde’s + [`Serializer` trait](https://docs.rs/serde/latest/serde/ser/trait.Serializer.html). + Serde uses typestates internally to ensure serialization follows a valid + structure. For more, see: + +- The key idea behind typestate is that state transitions happen by consuming a + value and producing a new one. At each step, only operations valid for that + state are available. + - In this example: - We begin with a `Serializer`, which only allows us to start serializing a diff --git a/src/idiomatic/leveraging-the-type-system/typestate-pattern/typestate-generics.md b/src/idiomatic/leveraging-the-type-system/typestate-pattern/typestate-generics.md index 9a83957802dd..d27cdb432c67 100644 --- a/src/idiomatic/leveraging-the-type-system/typestate-pattern/typestate-generics.md +++ b/src/idiomatic/leveraging-the-type-system/typestate-pattern/typestate-generics.md @@ -46,13 +46,6 @@ impl Serializer { } } -impl Serializer { - fn buffer_size(&self) -> usize { - // [...] - # self.buffer.len() - } -} - impl Serializer> { fn serialize_property(mut self, name: &str) -> Serializer>> { // [...] @@ -157,10 +150,8 @@ fn main() { .finish_struct() .finish_struct(); - # let buffer_size = serializer.buffer_size(); let output = serializer.finish(); - # println!("buffer size = {buffer_size}\n---"); println!("{output}"); // These will all fail at compile time: @@ -185,6 +176,13 @@ fn main() { - This lets us build a recursive structure while preserving control over what methods are accessible in each state. +- Methods common to all states can be implemented for any `S` in + `Serializer`. + +- These marker types (e.g., `List`) incur no memory or runtime overhead, as + they hold no data other than a possible Zero-Sized Type. Their sole purpose is + to enforce correct API usage by leveraging the type system. + - Here's how the flow maps to a state machine: ```bob From dea835b1c58719856d30575cbd80d4ae716b0bba Mon Sep 17 00:00:00 2001 From: Glen De Cauwsemaecker Date: Sat, 30 Aug 2025 15:46:58 +0200 Subject: [PATCH 10/10] apply remaining feedback of @randomPoison --- src/SUMMARY.md | 4 + .../typestate-pattern/typestate-advanced.md | 26 +- .../typestate-pattern/typestate-generics.md | 252 ++---------------- .../typestate-pattern/typestate-generics.rs | 164 ++++++++++++ .../typestate-generics/complete.md | 89 +++++++ .../typestate-generics/property.md | 49 ++++ .../typestate-generics/root.md | 40 +++ .../typestate-generics/struct.md | 43 +++ 8 files changed, 421 insertions(+), 246 deletions(-) create mode 100644 src/idiomatic/leveraging-the-type-system/typestate-pattern/typestate-generics.rs create mode 100644 src/idiomatic/leveraging-the-type-system/typestate-pattern/typestate-generics/complete.md create mode 100644 src/idiomatic/leveraging-the-type-system/typestate-pattern/typestate-generics/property.md create mode 100644 src/idiomatic/leveraging-the-type-system/typestate-pattern/typestate-generics/root.md create mode 100644 src/idiomatic/leveraging-the-type-system/typestate-pattern/typestate-generics/struct.md diff --git a/src/SUMMARY.md b/src/SUMMARY.md index e09226bd216c..fc0d724d3d49 100644 --- a/src/SUMMARY.md +++ b/src/SUMMARY.md @@ -441,6 +441,10 @@ - [Typestate Pattern Example](idiomatic/leveraging-the-type-system/typestate-pattern/typestate-example.md) - [Beyond Simple Typestate](idiomatic/leveraging-the-type-system/typestate-pattern/typestate-advanced.md) - [Typestate Pattern with Generics](idiomatic/leveraging-the-type-system/typestate-pattern/typestate-generics.md) + - [Serializer: implement Root](idiomatic/leveraging-the-type-system/typestate-pattern/typestate-generics/root.md) + - [Serializer: implement Struct](idiomatic/leveraging-the-type-system/typestate-pattern/typestate-generics/struct.md) + - [Serializer: implement Property](idiomatic/leveraging-the-type-system/typestate-pattern/typestate-generics/property.md) + - [Serializer: Complete implementation](idiomatic/leveraging-the-type-system/typestate-pattern/typestate-generics/complete.md) --- diff --git a/src/idiomatic/leveraging-the-type-system/typestate-pattern/typestate-advanced.md b/src/idiomatic/leveraging-the-type-system/typestate-pattern/typestate-advanced.md index fd10ef5e163f..805d22a50a95 100644 --- a/src/idiomatic/leveraging-the-type-system/typestate-pattern/typestate-advanced.md +++ b/src/idiomatic/leveraging-the-type-system/typestate-pattern/typestate-advanced.md @@ -53,18 +53,7 @@ impl SerializeList { } ``` -
- -- Building on our previous serializer, we now want to support **nested - structures** and **lists**. - -- However, this introduces both **duplication** and **structural complexity**. - -- Even more critically, we now hit a **type system limitation**: we cannot - cleanly express what `finish()` should return without duplicating variants for - every nesting context (e.g. root, struct, list). - -- To better understand this limitation, let’s map the valid transitions: +Diagram of valid transitions: ```bob +-----------+ +---------+------------+-----+ @@ -80,7 +69,18 @@ serializer --> structure --> property --> list +-+ +--------------------------+ ``` -- From this diagram, we can observe: +
+ +- Building on our previous serializer, we now want to support **nested + structures** and **lists**. + +- However, this introduces both **duplication** and **structural complexity**. + +- Even more critically, we now hit a **type system limitation**: we cannot + cleanly express what `finish()` should return without duplicating variants for + every nesting context (e.g. root, struct, list). + +- From the diagram of valid transitions, we can observe: - The transitions are recursive - The return types depend on _where_ a substructure or list appears - Each context requires a return path to its parent diff --git a/src/idiomatic/leveraging-the-type-system/typestate-pattern/typestate-generics.md b/src/idiomatic/leveraging-the-type-system/typestate-pattern/typestate-generics.md index d27cdb432c67..401550b5cc85 100644 --- a/src/idiomatic/leveraging-the-type-system/typestate-pattern/typestate-generics.md +++ b/src/idiomatic/leveraging-the-type-system/typestate-pattern/typestate-generics.md @@ -6,184 +6,19 @@ especially useful when the number of states grows or when multiple states share behavior but differ in structure. ```rust -# use std::fmt::Write as _; -# -struct Serializer { - // [...] - # indent: usize, - # buffer: String, - # state: S, -} +{{#include typestate-generics.rs:Serializer-def}} -struct Root; -struct Struct(S); -struct List(S); -struct Property(S); - -impl Serializer { - fn new() -> Self { - // [...] - # Self { - # indent: 0, - # buffer: String::new(), - # state: Root, - # } - } - - fn serialize_struct(mut self, name: &str) -> Serializer> { - // [...] - # writeln!(self.buffer, "{name} {{").unwrap(); - # Serializer { - # indent: self.indent + 1, - # buffer: self.buffer, - # state: Struct(self.state), - # } - } - - fn finish(self) -> String { - // [...] - # self.buffer - } -} - -impl Serializer> { - fn serialize_property(mut self, name: &str) -> Serializer>> { - // [...] - # write!(self.buffer, "{}{name}: ", " ".repeat(self.indent * 2)).unwrap(); - # Serializer { - # indent: self.indent, - # buffer: self.buffer, - # state: Property(self.state), - # } - } - - fn finish_struct(mut self) -> Serializer { - // [...] - # self.indent -= 1; - # writeln!(self.buffer, "{}}}", " ".repeat(self.indent * 2)).unwrap(); - # Serializer { - # indent: self.indent, - # buffer: self.buffer, - # state: self.state.0, - # } - } -} - -impl Serializer>> { - fn serialize_struct(mut self, name: &str) -> Serializer>> { - // [...] - # writeln!(self.buffer, "{name} {{").unwrap(); - # Serializer { - # indent: self.indent + 1, - # buffer: self.buffer, - # state: Struct(self.state.0), - # } - } - - fn serialize_list(mut self) -> Serializer>> { - // [...] - # writeln!(self.buffer, "[").unwrap(); - # Serializer { - # indent: self.indent + 1, - # buffer: self.buffer, - # state: List(self.state.0), - # } - } - - fn serialize_string(mut self, value: &str) -> Serializer> { - // [...] - # writeln!(self.buffer, "{value},").unwrap(); - # Serializer { - # indent: self.indent, - # buffer: self.buffer, - # state: self.state.0, - # } - } -} - -impl Serializer> { - fn serialize_struct(mut self, name: &str) -> Serializer>> { - // [...] - # writeln!(self.buffer, "{}{name} {{", " ".repeat(self.indent * 2)).unwrap(); - # Serializer { - # indent: self.indent + 1, - # buffer: self.buffer, - # state: Struct(self.state), - # } - } - - fn serialize_string(mut self, value: &str) -> Self { - // [...] - # writeln!(self.buffer, "{}{value},", " ".repeat(self.indent * 2)).unwrap(); - # self - } - - fn finish_list(mut self) -> Serializer { - // [...] - # self.indent -= 1; - # writeln!(self.buffer, "{}]", " ".repeat(self.indent * 2)).unwrap(); - # Serializer { - # indent: self.indent, - # buffer: self.buffer, - # state: self.state.0, - # } - } -} - -fn main() { - # #[rustfmt::skip] - let serializer = Serializer::new() - .serialize_struct("Foo") - .serialize_property("bar") - .serialize_struct("Bar") - .serialize_property("baz") - .serialize_list() - .serialize_string("abc") - .serialize_struct("Baz") - .serialize_property("partial") - .serialize_string("def") - .serialize_property("empty") - .serialize_struct("Empty") - .finish_struct() - .finish_struct() - .finish_list() - .finish_struct() - .finish_struct(); - - let output = serializer.finish(); - - println!("{output}"); - - // These will all fail at compile time: - - // Serializer::new().serialize_list(); - // Serializer::new().serialize_string("foo"); - // Serializer::new().serialize_struct("Foo").serialize_string("bar"); - // Serializer::new().serialize_struct("Foo").serialize_list(); - // Serializer::new().serialize_property("foo"); -} +{{#include typestate-generics.rs:Root-def}} +{{#include typestate-generics.rs:Struct-def}} +{{#include typestate-generics.rs:Property-def}} +{{#include typestate-generics.rs:List-def}} ``` -
- -- The full code for this example is available - [in the playground](https://play.rust-lang.org/?version=stable&mode=debug&edition=2021&gist=48b106089ca600453f3ed00a0a31af26). - -- By using generics to track the parent context, we can construct arbitrarily - nested serializers that enforce valid transitions between struct, list, and - property states. - -- This lets us build a recursive structure while preserving control over what - methods are accessible in each state. - -- Methods common to all states can be implemented for any `S` in - `Serializer`. +We now have all the tools needed to implement the methods for the `Serializer` +and its state type definitions. This ensures that our API only permits valid +transitions, as illustrated in the following diagram: -- These marker types (e.g., `List`) incur no memory or runtime overhead, as - they hold no data other than a possible Zero-Sized Type. Their sole purpose is - to enforce correct API usage by leveraging the type system. - -- Here's how the flow maps to a state machine: +Diagram of valid transitions: ```bob +-----------+ +---------+------------+-----+ @@ -199,68 +34,19 @@ serializer --> structure --> property --> list +-+ +--------------------------+ ``` -- And this is reflected directly in the types of our serializer: - -```bob - +------+ - finish | | - serialize struct V | - struct -+---------------------+ --------------> +-----------------------------+ <---------------+ -| Serializer [ Root ] | | Serializer [ Struct [ S ] ] | | -+---------------------+ <-------------- +-----------------------------+ <-----------+ | - finish struct | | - | | serialize | | | - | +----------+ property V serialize | | - | | string or | | -finish | | +-------------------------------+ struct | | - V | | Serializer [ Property [ S ] ] | ------------+ | - finish | +-------------------------------+ | - +--------+ struct | | - | String | | serialize | | - +--------+ | list V | - | finish | - | +---------------------------+ list | - +------> | Serializer [ List [ S ] ] | ----------------+ - +---------------------------+ - serialize - list or string ^ - | or finish list | - +-------------------+ -``` - -- Of course, this pattern isn't a silver bullet. It still allows issues like: - - Empty or invalid property names (which can be fixed using - [the newtype pattern](../newtype-pattern.md)) - - Duplicate property names (which could be tracked in `Struct` and handled - via `Result`) - -- If validation failures occur, we can also change method signatures to return a - `Result`, allowing recovery: +
- ```rust,compile_fail - struct PropertySerializeError { - kind: PropertyError, - serializer: Serializer>, - } +- By leveraging generics to track the parent context, we can construct + arbitrarily nested serializers that enforce valid transitions between struct, + list, and property states. - impl Serializer> { - fn serialize_property( - self, - name: &str, - ) -> Result>>, PropertySerializeError> { - /* ... */ - } - } - ``` +- This enables us to build a recursive structure while maintaining strict + control over which methods are accessible in each state. -- While this API is powerful, it’s not always ergonomic. Production serializers - typically favor simpler APIs and reserve the typestate pattern for enforcing - critical invariants. +- Methods common to all states can be defined for any `S` in `Serializer`. -- One excellent real-world example is - [`rustls::ClientConfig`](https://docs.rs/rustls/latest/rustls/client/struct.ClientConfig.html#method.builder), - which uses typestate with generics to guide the user through safe and correct - configuration steps. +- Marker types (e.g., `List`) introduce no memory or runtime overhead, as + they contain no data other than a possible Zero-Sized Type. Their only role is + to enforce correct API usage through the type system.
diff --git a/src/idiomatic/leveraging-the-type-system/typestate-pattern/typestate-generics.rs b/src/idiomatic/leveraging-the-type-system/typestate-pattern/typestate-generics.rs new file mode 100644 index 000000000000..25587c35ef66 --- /dev/null +++ b/src/idiomatic/leveraging-the-type-system/typestate-pattern/typestate-generics.rs @@ -0,0 +1,164 @@ +// ANCHOR: Complete +use std::fmt::Write as _; + +// ANCHOR: Serializer-def +struct Serializer { + // [...] + indent: usize, + buffer: String, + state: S, +} +// ANCHOR_END: Serializer-def + +// ANCHOR: Root-def +struct Root; +// ANCHOR_END: Root-def + +// ANCHOR: Struct-def +struct Struct(S); +// ANCHOR_END: Struct-def + +// ANCHOR: List-def +struct List(S); +// ANCHOR_END: List-def + +// ANCHOR: Property-def +struct Property(S); +// ANCHOR_END: Property-def + +// ANCHOR: Root-impl +impl Serializer { + fn new() -> Self { + // [...] + Self { indent: 0, buffer: String::new(), state: Root } + } + + fn serialize_struct(mut self, name: &str) -> Serializer> { + // [...] + writeln!(self.buffer, "{name} {{").unwrap(); + Serializer { + indent: self.indent + 1, + buffer: self.buffer, + state: Struct(self.state), + } + } + + fn finish(self) -> String { + // [...] + self.buffer + } +} +// ANCHOR_END: Root-impl + +// ANCHOR: Struct-impl +impl Serializer> { + fn serialize_property(mut self, name: &str) -> Serializer>> { + // [...] + write!(self.buffer, "{}{name}: ", " ".repeat(self.indent * 2)).unwrap(); + Serializer { + indent: self.indent, + buffer: self.buffer, + state: Property(self.state), + } + } + + fn finish_struct(mut self) -> Serializer { + // [...] + self.indent -= 1; + writeln!(self.buffer, "{}}}", " ".repeat(self.indent * 2)).unwrap(); + Serializer { indent: self.indent, buffer: self.buffer, state: self.state.0 } + } +} +// ANCHOR_END: Struct-impl + +// ANCHOR: Property-impl +impl Serializer>> { + fn serialize_struct(mut self, name: &str) -> Serializer>> { + // [...] + writeln!(self.buffer, "{name} {{").unwrap(); + Serializer { + indent: self.indent + 1, + buffer: self.buffer, + state: Struct(self.state.0), + } + } + + fn serialize_list(mut self) -> Serializer>> { + // [...] + writeln!(self.buffer, "[").unwrap(); + Serializer { + indent: self.indent + 1, + buffer: self.buffer, + state: List(self.state.0), + } + } + + fn serialize_string(mut self, value: &str) -> Serializer> { + // [...] + writeln!(self.buffer, "{value},").unwrap(); + Serializer { indent: self.indent, buffer: self.buffer, state: self.state.0 } + } +} +// ANCHOR_END: Property-impl + +// ANCHOR: List-impl +impl Serializer> { + fn serialize_struct(mut self, name: &str) -> Serializer>> { + // [...] + writeln!(self.buffer, "{}{name} {{", " ".repeat(self.indent * 2)).unwrap(); + Serializer { + indent: self.indent + 1, + buffer: self.buffer, + state: Struct(self.state), + } + } + + fn serialize_string(mut self, value: &str) -> Self { + // [...] + writeln!(self.buffer, "{}{value},", " ".repeat(self.indent * 2)).unwrap(); + self + } + + fn finish_list(mut self) -> Serializer { + // [...] + self.indent -= 1; + writeln!(self.buffer, "{}]", " ".repeat(self.indent * 2)).unwrap(); + Serializer { indent: self.indent, buffer: self.buffer, state: self.state.0 } + } +} +// ANCHOR_END: List-impl + +// ANCHOR: main +fn main() { + #[rustfmt::skip] + let serializer = Serializer::new() + .serialize_struct("Foo") + .serialize_property("bar") + .serialize_struct("Bar") + .serialize_property("baz") + .serialize_list() + .serialize_string("abc") + .serialize_struct("Baz") + .serialize_property("partial") + .serialize_string("def") + .serialize_property("empty") + .serialize_struct("Empty") + .finish_struct() + .finish_struct() + .finish_list() + .finish_struct() + .finish_struct(); + + let output = serializer.finish(); + + println!("{output}"); + + // These will all fail at compile time: + + // Serializer::new().serialize_list(); + // Serializer::new().serialize_string("foo"); + // Serializer::new().serialize_struct("Foo").serialize_string("bar"); + // Serializer::new().serialize_struct("Foo").serialize_list(); + // Serializer::new().serialize_property("foo"); +} +// ANCHOR_END: main diff --git a/src/idiomatic/leveraging-the-type-system/typestate-pattern/typestate-generics/complete.md b/src/idiomatic/leveraging-the-type-system/typestate-pattern/typestate-generics/complete.md new file mode 100644 index 000000000000..066a7cc111ae --- /dev/null +++ b/src/idiomatic/leveraging-the-type-system/typestate-pattern/typestate-generics/complete.md @@ -0,0 +1,89 @@ +## Serializer: complete implementation + +Looking back at our original desired flow: + +```bob + +-----------+ +---------+------------+-----+ + | | | | | | + V | V | V | + + | +serializer --> structure --> property --> list +-+ + + | | ^ | ^ + V | | | | + | +-----------+ | + String | | + +--------------------------+ +``` + +We can now see this reflected directly in the types of our serializer: + +```bob + +------+ + finish | | + serialize struct V | + struct ++---------------------+ --------------> +-----------------------------+ <---------------+ +| Serializer [ Root ] | | Serializer [ Struct [ S ] ] | | ++---------------------+ <-------------- +-----------------------------+ <-----------+ | + finish struct | | + | | serialize | | | + | +----------+ property V serialize | | + | | string or | | +finish | | +-------------------------------+ struct | | + V | | Serializer [ Property [ S ] ] | ------------+ | + finish | +-------------------------------+ | + +--------+ struct | | + | String | | serialize | | + +--------+ | list V | + | finish | + | +---------------------------+ list | + +------> | Serializer [ List [ S ] ] | ----------------+ + +---------------------------+ + serialize + list or string ^ + | or finish list | + +-------------------+ +``` + +The code for the full implementation of the `Serializer` and all its states can +be found in +[this Rust playground](https://play.rust-lang.org/?version=stable&mode=debug&edition=2024&gist=c9cbb831cd05fe9db4ce42713c83ca16). + +
+ +- This pattern isn't a silver bullet. It still allows issues like: + - Empty or invalid property names (which can be fixed using + [the newtype pattern](../../newtype-pattern.md)) + - Duplicate property names (which could be tracked in `Struct` and handled + via `Result`) + +- If validation failures occur, we can also change method signatures to return a + `Result`, allowing recovery: + + ```rust,compile_fail + struct PropertySerializeError { + kind: PropertyError, + serializer: Serializer>, + } + + impl Serializer> { + fn serialize_property( + self, + name: &str, + ) -> Result>>, PropertySerializeError> { + /* ... */ + } + } + ``` + +- While this API is powerful, it’s not always ergonomic. Production serializers + typically favor simpler APIs and reserve the typestate pattern for enforcing + critical invariants. + +- One excellent real-world example is + [`rustls::ClientConfig`](https://docs.rs/rustls/latest/rustls/client/struct.ClientConfig.html#method.builder), + which uses typestate with generics to guide the user through safe and correct + configuration steps. + +
diff --git a/src/idiomatic/leveraging-the-type-system/typestate-pattern/typestate-generics/property.md b/src/idiomatic/leveraging-the-type-system/typestate-pattern/typestate-generics/property.md new file mode 100644 index 000000000000..31da577a719a --- /dev/null +++ b/src/idiomatic/leveraging-the-type-system/typestate-pattern/typestate-generics/property.md @@ -0,0 +1,49 @@ +## Serializer: implement Property + +```rust +# use std::fmt::Write as _; +{{#include ../typestate-generics.rs:Serializer-def}} + +{{#include ../typestate-generics.rs:Struct-def}} +{{#include ../typestate-generics.rs:Property-def}} +{{#include ../typestate-generics.rs:List-def}} + +{{#include ../typestate-generics.rs:Property-impl}} +``` + +With the addition of the Property state methods, our diagram is now nearly +complete: + +```bob + +------+ + finish | | + serialize struct V | + struct ++---------------------+ --------------> +-----------------------------+ +| Serializer [ Root ] | | Serializer [ Struct [ S ] ] | ++---------------------+ <-------------- +-----------------------------+ <-----------+ + finish struct | + | serialize | | + | property V serialize | + | string or | +finish | +-------------------------------+ struct | + V | Serializer [ Property [ S ] ] | ------------+ + +-------------------------------+ + +--------+ + | String | serialize | + +--------+ list V + + +---------------------------+ + | Serializer [ List [ S ] ] | + +---------------------------+ +``` + +
+ +- A property can be defined as a `String`, `Struct`, or `List`, enabling + the representation of nested structures. + +- This concludes the step-by-step implementation. The full implementation, + including support for `List`, is shown in the next slide. + +
diff --git a/src/idiomatic/leveraging-the-type-system/typestate-pattern/typestate-generics/root.md b/src/idiomatic/leveraging-the-type-system/typestate-pattern/typestate-generics/root.md new file mode 100644 index 000000000000..de1b90877b5a --- /dev/null +++ b/src/idiomatic/leveraging-the-type-system/typestate-pattern/typestate-generics/root.md @@ -0,0 +1,40 @@ +## Serializer: implement Root + +```rust +# use std::fmt::Write as _; +{{#include ../typestate-generics.rs:Serializer-def}} + +{{#include ../typestate-generics.rs:Root-def}} +{{#include ../typestate-generics.rs:Struct-def}} + +{{#include ../typestate-generics.rs:Root-impl}} +``` + +Referring back to our original diagram of valid transitions, we can visualize +the beginning of our implementation as follows: + +```bob + serialize + struct ++---------------------+ --------------> +--------------------------------+ +| Serializer [ Root ] | | Serializer [ Struct [ Root ] ] | ++---------------------+ <-------------- +--------------------------------+ + finish struct + | + | + | +finish | + V + + +--------+ + | String | + +--------+ +``` + +
+ +- At the "root" of our `Serializer`, the only construct allowed is a `Struct`. + +- The `Serializer` can only be finalized into a `String` from this root level. + +
diff --git a/src/idiomatic/leveraging-the-type-system/typestate-pattern/typestate-generics/struct.md b/src/idiomatic/leveraging-the-type-system/typestate-pattern/typestate-generics/struct.md new file mode 100644 index 000000000000..7931c50f3e5f --- /dev/null +++ b/src/idiomatic/leveraging-the-type-system/typestate-pattern/typestate-generics/struct.md @@ -0,0 +1,43 @@ +## Serializer: implement Struct + +```rust +# use std::fmt::Write as _; +{{#include ../typestate-generics.rs:Serializer-def}} + +{{#include ../typestate-generics.rs:Struct-def}} +{{#include ../typestate-generics.rs:Property-def}} + +{{#include ../typestate-generics.rs:Struct-impl}} +``` + +The diagram can now be expanded as follows: + +```bob + +------+ + finish | | + serialize struct V | + struct ++---------------------+ --------------> +-----------------------------+ +| Serializer [ Root ] | | Serializer [ Struct [ S ] ] | ++---------------------+ <-------------- +-----------------------------+ + finish struct + | serialize | + | property V + | +finish | +------------------------------------------+ + V | Serializer [ Property [ Struct [ S ] ] ] | + +------------------------------------------+ + +--------+ + | String | + +--------+ +``` + +
+ +- A `Struct` can only contain a `Property`; + +- Finishing a `Struct` returns control back to its parent, which in our previous + slide was assumed the `Root`, but in reality however it can be also something + else such as `Struct` in case of nested "structs". + +