diff --git a/docs/make.jl b/docs/make.jl
index 1946e7f..4a85f93 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -13,7 +13,6 @@ api_files = map(x -> joinpath("api", x), readdir(api_dir))
 tutorial_files = readdir(tutorial_dir)
 md_tutorial_files = [split(file, ".")[1] * ".md" for file in tutorial_files]
 benchmark_files = [joinpath("benchmarks", e) for e in readdir(benchmarks_dir)]
-# md_benchmark_files = [split(file, ".")[1] * ".md" for file in benchmark_files]
 
 include_tutorial = true
 
@@ -33,7 +32,7 @@ makedocs(;
         "Home" => "index.md",
         "Tutorials" => include_tutorial ? md_tutorial_files : [],
         "Benchmark problems list" => benchmark_files,
-        "API reference" => api_files,
+        "API reference" => "api/api.md",
     ],
 )
 
diff --git a/docs/src/api/0_interface.md b/docs/src/api/0_interface.md
deleted file mode 100644
index 6363833..0000000
--- a/docs/src/api/0_interface.md
+++ /dev/null
@@ -1,19 +0,0 @@
-```@meta
-CollapsedDocStrings = true
-```
-
-# Interface
-
-## Public
-
-```@autodocs
-Modules = [DecisionFocusedLearningBenchmarks.Utils]
-Private = false
-```
-
-## Private
-
-```@autodocs
-Modules = [DecisionFocusedLearningBenchmarks.Utils]
-Public = false
-```
diff --git a/docs/src/api/api.md b/docs/src/api/api.md
new file mode 100644
index 0000000..36135ca
--- /dev/null
+++ b/docs/src/api/api.md
@@ -0,0 +1,177 @@
+# API Reference
+
+## Interface
+
+### Public
+
+```@autodocs
+Modules = [DecisionFocusedLearningBenchmarks.Utils]
+Private = false
+```
+
+### Private
+
+```@autodocs
+Modules = [DecisionFocusedLearningBenchmarks.Utils]
+Public = false
+```
+
+## Argmax2D
+
+### Public
+
+```@autodocs
+Modules = [DecisionFocusedLearningBenchmarks.Argmax2D]
+Private = false
+```
+
+### Private
+
+```@autodocs
+Modules = [DecisionFocusedLearningBenchmarks.Argmax2D]
+Public = false
+```
+
+## Argmax
+
+### Public
+
+```@autodocs
+Modules = [DecisionFocusedLearningBenchmarks.Argmax]
+Private = false
+```
+
+### Private
+
+```@autodocs
+Modules = [DecisionFocusedLearningBenchmarks.Argmax]
+Public = false
+```
+
+## Dynamic Vehicle Scheduling
+
+### Public
+
+```@autodocs
+Modules = [DecisionFocusedLearningBenchmarks.DynamicVehicleScheduling]
+Private = false
+```
+
+### Private
+
+```@autodocs
+Modules = [DecisionFocusedLearningBenchmarks.DynamicVehicleScheduling]
+Public = false
+```
+
+## Dynamic Assortment
+
+### Public
+
+```@autodocs
+Modules = [DecisionFocusedLearningBenchmarks.DynamicAssortment]
+Private = false
+```
+
+### Private
+
+```@autodocs
+Modules = [DecisionFocusedLearningBenchmarks.DynamicAssortment]
+Public = false
+```
+
+## Fixed-size shortest path
+
+### Public
+
+```@autodocs
+Modules = [DecisionFocusedLearningBenchmarks.FixedSizeShortestPath]
+Private = false
+```
+
+### Private
+
+```@autodocs
+Modules = [DecisionFocusedLearningBenchmarks.FixedSizeShortestPath]
+Public = false
+```
+
+## Portfolio Optimization
+
+### Public
+
+```@autodocs
+Modules = [DecisionFocusedLearningBenchmarks.PortfolioOptimization]
+Private = false
+```
+
+### Private
+
+```@autodocs
+Modules = [DecisionFocusedLearningBenchmarks.PortfolioOptimization]
+Public = false
+```
+
+## Ranking
+
+### Public
+
+```@autodocs
+Modules = [DecisionFocusedLearningBenchmarks.Ranking]
+Private = false
+```
+
+### Private
+
+```@autodocs
+Modules = [DecisionFocusedLearningBenchmarks.Ranking]
+Public = false
+```
+
+## Subset selection
+
+### Public
+
+```@autodocs
+Modules = [DecisionFocusedLearningBenchmarks.SubsetSelection]
+Private = false
+```
+
+### Private
+
+```@autodocs
+Modules = [DecisionFocusedLearningBenchmarks.SubsetSelection]
+Public = false
+```
+
+## Stochastic Vehicle Scheduling
+
+### Public
+
+```@autodocs
+Modules = [DecisionFocusedLearningBenchmarks.StochasticVehicleScheduling]
+Private = false
+```
+
+### Private
+
+```@autodocs
+Modules = [DecisionFocusedLearningBenchmarks.StochasticVehicleScheduling]
+Public = false
+```
+
+## Warcraft
+
+### Public
+
+```@autodocs
+Modules = [DecisionFocusedLearningBenchmarks.Warcraft]
+Private = false
+```
+
+### Private
+
+```@autodocs
+Modules = [DecisionFocusedLearningBenchmarks.Warcraft]
+Public = false
+```
diff --git a/docs/src/api/argmax.md b/docs/src/api/argmax.md
deleted file mode 100644
index d3b8d29..0000000
--- a/docs/src/api/argmax.md
+++ /dev/null
@@ -1,19 +0,0 @@
-```@meta
-CollapsedDocStrings = true
-```
-
-# Argmax
-
-## Public
-
-```@autodocs
-Modules = [DecisionFocusedLearningBenchmarks.Argmax]
-Private = false
-```
-
-## Private
-
-```@autodocs
-Modules = [DecisionFocusedLearningBenchmarks.Argmax]
-Public = false
-```
diff --git a/docs/src/api/argmax_2d.md b/docs/src/api/argmax_2d.md
deleted file mode 100644
index ce28b54..0000000
--- a/docs/src/api/argmax_2d.md
+++ /dev/null
@@ -1,19 +0,0 @@
-```@meta
-CollapsedDocStrings = true
-```
-
-# Argmax2D
-
-## Public
-
-```@autodocs
-Modules = [DecisionFocusedLearningBenchmarks.Argmax2D]
-Private = false
-```
-
-## Private
-
-```@autodocs
-Modules = [DecisionFocusedLearningBenchmarks.Argmax2D]
-Public = false
-```
diff --git a/docs/src/api/dvsp.md b/docs/src/api/dvsp.md
deleted file mode 100644
index 2922696..0000000
--- a/docs/src/api/dvsp.md
+++ /dev/null
@@ -1,19 +0,0 @@
-```@meta
-CollapsedDocStrings = true
-```
-
-# Dynamic Vehicle Scheduling
-
-## Public
-
-```@autodocs
-Modules = [DecisionFocusedLearningBenchmarks.DynamicVehicleScheduling]
-Private = false
-```
-
-## Private
-
-```@autodocs
-Modules = [DecisionFocusedLearningBenchmarks.DynamicVehicleScheduling]
-Public = false
-```
diff --git a/docs/src/api/dynamic_assorment.md b/docs/src/api/dynamic_assorment.md
deleted file mode 100644
index 847d184..0000000
--- a/docs/src/api/dynamic_assorment.md
+++ /dev/null
@@ -1,19 +0,0 @@
-```@meta
-CollapsedDocStrings = true
-```
-
-# Dynamic Assortment
-
-## Public
-
-```@autodocs
-Modules = [DecisionFocusedLearningBenchmarks.DynamicAssortment]
-Private = false
-```
-
-## Private
-
-```@autodocs
-Modules = [DecisionFocusedLearningBenchmarks.DynamicAssortment]
-Public = false
-```
diff --git a/docs/src/api/fixed_shortest_path.md b/docs/src/api/fixed_shortest_path.md
deleted file mode 100644
index 36a03b2..0000000
--- a/docs/src/api/fixed_shortest_path.md
+++ /dev/null
@@ -1,19 +0,0 @@
-```@meta
-CollapsedDocStrings = true
-```
-
-# Fixed-size shortest path
-
-## Public
-
-```@autodocs
-Modules = [DecisionFocusedLearningBenchmarks.FixedSizeShortestPath]
-Private = false
-```
-
-## Private
-
-```@autodocs
-Modules = [DecisionFocusedLearningBenchmarks.FixedSizeShortestPath]
-Public = false
-```
diff --git a/docs/src/api/portfolio_optimization.md b/docs/src/api/portfolio_optimization.md
deleted file mode 100644
index 6d198ac..0000000
--- a/docs/src/api/portfolio_optimization.md
+++ /dev/null
@@ -1,19 +0,0 @@
-```@meta
-CollapsedDocStrings = true
-```
-
-# Subset selection
-
-## Public
-
-```@autodocs
-Modules = [DecisionFocusedLearningBenchmarks.PortfolioOptimization]
-Private = false
-```
-
-## Private
-
-```@autodocs
-Modules = [DecisionFocusedLearningBenchmarks.PortfolioOptimization]
-Public = false
-```
diff --git a/docs/src/api/ranking.md b/docs/src/api/ranking.md
deleted file mode 100644
index 82d0719..0000000
--- a/docs/src/api/ranking.md
+++ /dev/null
@@ -1,19 +0,0 @@
-```@meta
-CollapsedDocStrings = true
-```
-
-# Ranking
-
-## Public
-
-```@autodocs
-Modules = [DecisionFocusedLearningBenchmarks.Ranking]
-Private = false
-```
-
-## Private
-
-```@autodocs
-Modules = [DecisionFocusedLearningBenchmarks.Ranking]
-Public = false
-```
diff --git a/docs/src/api/subset_selection.md b/docs/src/api/subset_selection.md
deleted file mode 100644
index 946eb3c..0000000
--- a/docs/src/api/subset_selection.md
+++ /dev/null
@@ -1,19 +0,0 @@
-```@meta
-CollapsedDocStrings = true
-```
-
-# Subset selection
-
-## Public
-
-```@autodocs
-Modules = [DecisionFocusedLearningBenchmarks.SubsetSelection]
-Private = false
-```
-
-## Private
-
-```@autodocs
-Modules = [DecisionFocusedLearningBenchmarks.SubsetSelection]
-Public = false
-```
diff --git a/docs/src/api/vsp.md b/docs/src/api/vsp.md
deleted file mode 100644
index 119c9ba..0000000
--- a/docs/src/api/vsp.md
+++ /dev/null
@@ -1,19 +0,0 @@
-```@meta
-CollapsedDocStrings = true
-```
-
-# Stochastic Vehicle Scheduling
-
-## Public
-
-```@autodocs
-Modules = [DecisionFocusedLearningBenchmarks.StochasticVehicleScheduling]
-Private = false
-```
-
-## Private
-
-```@autodocs
-Modules = [DecisionFocusedLearningBenchmarks.StochasticVehicleScheduling]
-Public = false
-```
diff --git a/docs/src/api/warcraft.md b/docs/src/api/warcraft.md
deleted file mode 100644
index c3bd480..0000000
--- a/docs/src/api/warcraft.md
+++ /dev/null
@@ -1,19 +0,0 @@
-```@meta
-CollapsedDocStrings = true
-```
-
-# Warcraft
-
-## Public
-
-```@autodocs
-Modules = [DecisionFocusedLearningBenchmarks.Warcraft]
-Private = false
-```
-
-## Private
-
-```@autodocs
-Modules = [DecisionFocusedLearningBenchmarks.Warcraft]
-Public = false
-```
diff --git a/docs/src/benchmarks/dynamic_assorment.md b/docs/src/benchmarks/dynamic_assorment.md
deleted file mode 100644
index dcf3243..0000000
--- a/docs/src/benchmarks/dynamic_assorment.md
+++ /dev/null
@@ -1,3 +0,0 @@
-# Dynamic Assortment
-
-[`DynamicAssortmentBenchmark`](@ref).
diff --git a/docs/src/benchmarks/dynamic_assortment.md b/docs/src/benchmarks/dynamic_assortment.md
new file mode 100644
index 0000000..6f5264c
--- /dev/null
+++ b/docs/src/benchmarks/dynamic_assortment.md
@@ -0,0 +1,158 @@
+# Dynamic Assortment
+
+The Dynamic Assortment problem is a sequential decision-making benchmark where an agent must repeatedly select which subset of items to offer to customers over time. The goal is to maximize total revenue while accounting for dynamic customer preferences that evolve based on purchase history.
+
+## Problem Description
+
+### Overview
+
+In the dynamic assortment problem, a retailer has access to a catalog of ``N`` items and must decide which subset of exactly ``K`` items to offer to customers at each time step. Customers make purchasing decisions according to a choice model that depends on public features ``x``:
+
+- **Item prices**: Fixed monetary cost of each item
+- **Item features**: Static characteristics of each item (size ``d``)
+- **Hype**: Dynamic popularity that increases when items are purchased recently, and decays over time if not purchased
+- **Saturation**: Dynamic measure that slightly increases when specific items are purchased
+
+Both hype and saturation evolve over time based on the agent's assortment decisions and customer purchases, this providing an endogenous multistage stochastic optimization problem.
+
+### Mathematical Formulation
+
+The dynamic assortment problem can be formulated as a finite-horizon Markov Decision Process (MDP) with the following components:
+
+**State Space** ``\mathcal{S}``: At time step ``t``, the state ``s_t`` consists of:
+```math
+s_t = (p, f, h_t, \sigma_t, t, \mathcal{H}_t)
+```
+where:
+- ``p \in \mathbb{R}^N`` are the fixed item prices
+- ``f \in \mathbb{R}^{d \times N}`` are the static item features
+- ``h_t \in \mathbb{R}^N`` are the current hype levels for each item
+- ``\sigma_t \in \mathbb{R}^N`` are the current saturation levels for each item
+- ``t \in \{1, 2, \ldots, T\}`` is the current time step
+- ``\mathcal{H}_t`` is the purchase history (last 5 purchases)
+
+**Action Space** ``\mathcal{A}``: The action at time ``t`` is an assortment selection:
+```math
+a_t \subseteq \{1, 2, \ldots, N\} \text{ such that } |a_t| = K
+```
+
+**Customer Choice Model**: Given assortment ``a_t``, customers choose according to a multinomial logit model:
+```math
+\forall i\in a_t,\, \mathbb{P}(i | a_t, s_t) = \frac{\exp(\theta_i(s_t))}{\sum_{j\in a_t} \exp(\theta_j(s_t)) + 1}
+```
+```math
+\mathbb{P}(\text{no purchase} | a_t, s_t) = \frac{1}{\sum_{j\in a_t} \exp(\theta_j(s_t)) + 1}
+```
+
+where ``\theta_i(s_t)`` is the utility of item ``i`` at state ``s_t``, computed by a hidden utility function:
+```math
+\theta_i(s_t) = \Phi(p_i, h_t^{(i)}, \sigma_t^{(i)}, f_{\cdot,i})
+```
+
+**Transition Dynamics** ``\mathcal{P}(s_{t+1} | s_t, a_t)``: After selecting assortment ``a_t`` and observing customer choice ``i^\star \sim \mathbb{P}(\cdot | a_t, s_t)``, the state evolves as:
+
+1. **Hype Update**: For each item ``i``, compute a hype multiplier based on recent purchase history:
+   ```math
+   m^{(i)} = 1 + \sum_{k=1}^{\min(5, |\mathcal{H}_t|)} \mathbf{1}_{i = \mathcal{H}_t[-k]} \cdot \alpha_k
+   ```
+   where ``\mathcal{H}_t[-k]`` is the ``k``-th most recent purchase, and the factors are:
+   ```math
+   \alpha_1 = 0.02, \quad \alpha_2 = \alpha_3 = \alpha_4 = \alpha_5 = -0.005
+   ```
+   Then update: ``h_{t+1}^{(i)} = h_t^{(i)} \times m^{(i)}``
+
+2. **Saturation Update**:
+   ```math
+   \sigma_{t+1}^{(i)} = \begin{cases}
+   \sigma_t^{(i)} \times 1.01 & \text{if } i = i^\star \\
+   \sigma_t^{(i)} & \text{otherwise}
+   \end{cases}
+   ```
+
+3. **History Update**: ``\mathcal{H}_{t+1} = \text{append}(\mathcal{H}_t, i^\star)`` (keeping last 5 purchases)
+
+**Reward Function** ``r(s_t, a_t, s_{t+1})``: The immediate reward is the revenue from the customer's purchase:
+```math
+r(s_t, a_t, s_{t+1}) = \begin{cases}
+p_{i^\star} & \text{if customer purchases item } i^\star \\
+0 & \text{if no purchase}
+\end{cases}
+```
+
+**Objective**: Find a policy ``\pi: \mathcal{S} \to \mathcal{A}`` that maximizes the expected cumulative reward:
+```math
+\max_\pi \mathbb{E}\left[\sum_{t=1}^T r(s_t, \pi(s_t), s_{t+1}) \right]
+```
+
+**Terminal Condition**: The episode terminates after ``T`` time steps, with no terminal reward.
+
+## Key Components
+
+### [`DynamicAssortmentBenchmark`](@ref)
+
+The main benchmark configuration with the following parameters:
+
+- `N`: Number of items in the catalog (default: 20)
+- `d`: Dimension of static feature vectors (default: 2) 
+- `K`: Assortment size constraint (default: 4)
+- `max_steps`: Number of time steps per episode (default: 80)
+- `customer_choice_model`: linear mapping from features to utilities
+- `exogenous`: Whether dynamics are exogenous (default: false)
+
+### Instance Generation
+
+Each problem instance includes:
+
+- **Prices**: Random values in [1, 10] for each item, plus 0 for no-purchase
+- **Features**: Random static features in [1, 10] for each item
+- **Initial State**: Random starting hype and saturation values in [1, 10]
+
+### Environment Dynamics
+
+The environment tracks:
+- Current time step
+- Purchase history (last 5 purchases)
+- Current hype and saturation for each item  
+- Customer utilities computed from current state
+
+**State Observation**: Agents observe a normalized feature vector containing:
+- Current full features (prices, hype, saturation, static features)
+- Change in hype/saturation from previous step
+- Change in hype/saturation from initial state  
+- Normalized current time step
+
+All features are divided by 10 for normalization.
+
+## Benchmark Policies
+
+### Expert Policy
+
+The expert policy computes the optimal assortment by brute-force enumeration:
+1. Enumerate all possible K-subsets of the N items
+2. For each subset, compute expected revenue using the choice model
+3. Return the subset with highest expected revenue
+
+This provides an optimal baseline but is computationally expensive.
+
+### Greedy Policy  
+
+The greedy policy selects the K items with the highest prices, ignoring dynamic effects and customer preferences. This provides a simple baseline.
+
+## Decision-Focused Learning Policy
+
+```math
+\xrightarrow[\text{State}]{s_t}
+\fbox{Neural network $\varphi_w$}
+\xrightarrow[\text{Cost vector}]{\theta}
+\fbox{Top K}
+\xrightarrow[\text{Assortment}]{a_t}
+```
+
+**Components**:
+
+1. **Neural Network** ``\varphi_w``: Takes the current state ``s_t`` as input and predicts item utilities ``\theta = (\theta_1, \ldots, \theta_N)``
+2. **Optimization Layer**: Selects the top ``K`` items with highest predicted utilities to form the assortment ``a_t``
+
+## Reference
+
+Based on the paper: [Structured Reinforcement Learning for Combinatorial Decision-Making](https://arxiv.org/abs/2505.19053)
diff --git a/docs/src/tutorials/warcraft.jl b/docs/src/tutorials/warcraft_tutorial.jl
similarity index 100%
rename from docs/src/tutorials/warcraft.jl
rename to docs/src/tutorials/warcraft_tutorial.jl
diff --git a/docs/src/warcraft.md b/docs/src/warcraft.md
deleted file mode 100644
index c3400e7..0000000
--- a/docs/src/warcraft.md
+++ /dev/null
@@ -1,155 +0,0 @@
-```@meta
-EditURL = "tutorials/warcraft.jl"
-```
-
-# Path-finding on image maps
-
-In this tutorial, we showcase DecisionFocusedLearningBenchmarks.jl capabilities on one of its main benchmarks: the Warcraft benchmark.
-This benchmark problem is a simple path-finding problem where the goal is to find the shortest path between the top left and bottom right corners of a given image map.
-The map is represented as a 2D image representing a 12x12 grid, each cell having an unknown travel cost depending on the terrain type.
-
-First, let's load the package and create a benchmark object as follows:
-
-````@example warcraft
-using DecisionFocusedLearningBenchmarks
-b = WarcraftBenchmark()
-````
-
-## Dataset generation
-
-These benchmark objects behave as generators that can generate various needed elements in order to build an algorithm to tackle the problem.
-First of all, all benchmarks are capable of generating datasets as needed, using the [`generate_dataset`](@ref) method.
-This method takes as input the benchmark object for which the dataset is to be generated, and a second argument specifying the number of samples to generate:
-
-````@example warcraft
-dataset = generate_dataset(b, 50);
-nothing #hide
-````
-
-We obtain a vector of [`DataSample`](@ref) objects, containing all needed data for the problem.
-Subdatasets can be created through regular slicing:
-
-````@example warcraft
-train_dataset, test_dataset = dataset[1:45], dataset[46:50]
-````
-
-And getting an individual sample will return a [`DataSample`](@ref) with four fields: `x`, `instance`, `θ`, and `y`:
-
-````@example warcraft
-sample = test_dataset[1]
-````
-
-`x` correspond to the input features, i.e. the input image (3D array) in the Warcraft benchmark case:
-
-````@example warcraft
-x = sample.x
-````
-
-`θ_true` correspond to the true unknown terrain weights. We use the opposite of the true weights in order to formulate the optimization problem as a maximization problem:
-
-````@example warcraft
-θ_true = sample.θ_true
-````
-
-`y_true` correspond to the optimal shortest path, encoded as a binary matrix:
-
-````@example warcraft
-y_true = sample.y_true
-````
-
-`instance` is not used in this benchmark, therefore set to nothing:
-
-````@example warcraft
-isnothing(sample.instance)
-````
-
-For some benchmarks, we provide the following plotting method [`plot_data`](@ref) to visualize the data:
-
-````@example warcraft
-plot_data(b, sample)
-````
-
-We can see here the terrain image, the true terrain weights, and the true shortest path avoiding the high cost cells.
-
-## Building a pipeline
-
-DecisionFocusedLearningBenchmarks also provides methods to build an hybrid machine learning and combinatorial optimization pipeline for the benchmark.
-First, the [`generate_statistical_model`](@ref) method generates a machine learning predictor to predict cell weights from the input image:
-
-````@example warcraft
-model = generate_statistical_model(b)
-````
-
-In the case of the Warcraft benchmark, the model is a convolutional neural network built using the Flux.jl package.
-
-````@example warcraft
-θ = model(x)
-````
-
-Note that the model is not trained yet, and its parameters are randomly initialized.
-
-Finally, the [`generate_maximizer`](@ref) method can be used to generate a combinatorial optimization algorithm that takes the predicted cell weights as input and returns the corresponding shortest path:
-
-````@example warcraft
-maximizer = generate_maximizer(b; dijkstra=true)
-````
-
-In the case o fthe Warcraft benchmark, the method has an additional keyword argument to chose the algorithm to use: Dijkstra's algorithm or Bellman-Ford algorithm.
-
-````@example warcraft
-y = maximizer(θ)
-````
-
-As we can see, currently the pipeline predicts random noise as cell weights, and therefore the maximizer returns a straight line path.
-
-````@example warcraft
-plot_data(b, DataSample(; x, θ_true=θ, y_true=y))
-````
-
-We can evaluate the current pipeline performance using the optimality gap metric:
-
-````@example warcraft
-starting_gap = compute_gap(b, test_dataset, model, maximizer)
-````
-
-## Using a learning algorithm
-
-We can now train the model using the InferOpt.jl package:
-
-````@example warcraft
-using InferOpt
-using Flux
-using Plots
-
-perturbed_maximizer = PerturbedMultiplicative(maximizer; ε=0.2, nb_samples=100)
-loss = FenchelYoungLoss(perturbed_maximizer)
-
-starting_gap = compute_gap(b, test_dataset, model, maximizer)
-
-opt_state = Flux.setup(Adam(1e-3), model)
-loss_history = Float64[]
-for epoch in 1:50
-    val, grads = Flux.withgradient(model) do m
-        sum(loss(m(x), y_true) for (; x, y_true) in train_dataset) / length(train_dataset)
-    end
-    Flux.update!(opt_state, model, grads[1])
-    push!(loss_history, val)
-end
-
-plot(loss_history; xlabel="Epoch", ylabel="Loss", title="Training loss")
-````
-
-````@example warcraft
-final_gap = compute_gap(b, test_dataset, model, maximizer)
-````
-
-````@example warcraft
-θ = model(x)
-y = maximizer(θ)
-plot_data(b, DataSample(; x, θ_true=θ, y_true=y))
-````
-
----
-
-*This page was generated using [Literate.jl](https://github.com/fredrikekre/Literate.jl).*
-
diff --git a/src/DynamicAssortment/DynamicAssortment.jl b/src/DynamicAssortment/DynamicAssortment.jl
index 2c61c5f..e3455bd 100644
--- a/src/DynamicAssortment/DynamicAssortment.jl
+++ b/src/DynamicAssortment/DynamicAssortment.jl
@@ -2,13 +2,13 @@ module DynamicAssortment
 
 using ..Utils
 
-using DocStringExtensions: TYPEDEF, TYPEDFIELDS, TYPEDSIGNATURES
+using DocStringExtensions: TYPEDEF, TYPEDFIELDS, TYPEDSIGNATURES, SIGNATURES
 using Distributions: Uniform, Categorical
+using Flux: Chain, Dense
 using LinearAlgebra: dot
 using Random: Random, AbstractRNG, MersenneTwister
 using Statistics: mean
 
-using Flux: Chain, Dense
 using Combinatorics: combinations
 
 """
@@ -18,6 +18,8 @@ Benchmark for the dynamic assortment problem.
 
 # Fields
 $TYPEDFIELDS
+
+Reference: <https://arxiv.org/abs/2505.19053>
 """
 struct DynamicAssortmentBenchmark{exogenous,M} <: AbstractDynamicBenchmark{exogenous}
     "customer choice model (price, hype, saturation, and features)"
@@ -32,12 +34,29 @@ struct DynamicAssortmentBenchmark{exogenous,M} <: AbstractDynamicBenchmark{exoge
     max_steps::Int
 end
 
+"""
+    DynamicAssortmentBenchmark(;
+        N=20,
+        d=2,
+        K=4,
+        max_steps=80,
+        customer_choice_model=Chain(Dense([-0.8 0.6 -0.4 0.3 0.5]), vec),
+        exogenous=false
+    )
+
+Constructor for [`DynamicAssortmentBenchmark`](@ref).
+By default, the benchmark has 20 items, feature dimension 2, assortment size 4, 80 steps per
+episode, a simple linear customer choice model, and is endogenous.
+"""
+
 function DynamicAssortmentBenchmark(;
     N=20,
     d=2,
     K=4,
     max_steps=80,
-    customer_choice_model=Chain(Dense([-0.8 0.6 -0.4 0.3 0.5]), vec),
+    customer_choice_model=Chain(
+        Dense(hcat([-0.8 0.6 -0.4], reshape([0.3 + 0.2 * (i - 1) for i in 1:d], 1, d))), vec
+    ),
     exogenous=false,
 )
     return DynamicAssortmentBenchmark{exogenous,typeof(customer_choice_model)}(
@@ -45,32 +64,55 @@ function DynamicAssortmentBenchmark(;
     )
 end
 
-include("instance.jl")
-include("environment.jl")
-include("policies.jl")
-
+# Accessor functions
 customer_choice_model(b::DynamicAssortmentBenchmark) = b.customer_choice_model
 item_count(b::DynamicAssortmentBenchmark) = b.N
 feature_count(b::DynamicAssortmentBenchmark) = b.d
 assortment_size(b::DynamicAssortmentBenchmark) = b.K
 max_steps(b::DynamicAssortmentBenchmark) = b.max_steps
 
+include("instance.jl")
+include("environment.jl")
+include("policies.jl")
+
+"""
+$TYPEDSIGNATURES
+
+Outputs a data sample containing an [`Instance`](@ref).
+"""
 function Utils.generate_sample(
     b::DynamicAssortmentBenchmark, rng::AbstractRNG=MersenneTwister(0)
 )
     return DataSample(; instance=Instance(b, rng))
 end
 
+"""
+$TYPEDSIGNATURES
+
+Generates a statistical model for the dynamic assortment benchmark.
+The model is a small neural network with one hidden layer of size 5 and no activation function.
+"""
 function Utils.generate_statistical_model(b::DynamicAssortmentBenchmark; seed=nothing)
     Random.seed!(seed)
     d = feature_count(b)
     return Chain(Dense(d + 8 => 5), Dense(5 => 1), vec)
 end
 
+"""
+$TYPEDSIGNATURES
+
+Outputs a top k maximizer, with k being the assortment size of the benchmark.
+"""
 function Utils.generate_maximizer(b::DynamicAssortmentBenchmark)
     return TopKMaximizer(assortment_size(b))
 end
 
+"""
+$TYPEDSIGNATURES
+
+Creates an [`Environment`](@ref) from an [`Instance`](@ref) of the dynamic assortment benchmark.
+The seed of the environment is randomly generated using the provided random number generator.
+"""
 function Utils.generate_environment(
     ::DynamicAssortmentBenchmark, instance::Instance, rng::AbstractRNG; kwargs...
 )
@@ -78,7 +120,14 @@ function Utils.generate_environment(
     return Environment(instance; seed)
 end
 
-function Utils.generate_policies(b::DynamicAssortmentBenchmark)
+"""
+$TYPEDSIGNATURES
+
+Returns two policies for the dynamic assortment benchmark:
+- `Greedy`: selects the assortment containing items with the highest prices
+- `Expert`: selects the assortment with the highest expected revenue (through brute-force enumeration)
+"""
+function Utils.generate_policies(::DynamicAssortmentBenchmark)
     greedy = Policy(
         "Greedy",
         "policy that selects the assortment with items with the highest prices",
@@ -93,5 +142,8 @@ function Utils.generate_policies(b::DynamicAssortmentBenchmark)
 end
 
 export DynamicAssortmentBenchmark
+public generate_sample, generate_statistical_model, generate_maximizer
+public generate_environment, generate_policies
+public reset!, is_terminated, observe, step!
 
 end
diff --git a/src/DynamicAssortment/environment.jl b/src/DynamicAssortment/environment.jl
index 8389a0c..603ca84 100644
--- a/src/DynamicAssortment/environment.jl
+++ b/src/DynamicAssortment/environment.jl
@@ -13,7 +13,7 @@ $TYPEDFIELDS
     "current step"
     step::Int
     "purchase history (used to update hype feature)"
-    purchase_hist::Vector{Int}
+    purchase_history::Vector{Int}
     "rng"
     rng::R
     "seed for RNG"
@@ -26,6 +26,11 @@ $TYPEDFIELDS
     d_features::Matrix{Float64}
 end
 
+"""
+$TYPEDSIGNATURES
+
+Creates an [`Environment`](@ref) from an [`Instance`](@ref) of the dynamic assortment benchmark.
+"""
 function Environment(instance::Instance; seed=0, rng::AbstractRNG=MersenneTwister(seed))
     N = item_count(instance)
     (; prices, features, starting_hype_and_saturation) = instance
@@ -36,7 +41,7 @@ function Environment(instance::Instance; seed=0, rng::AbstractRNG=MersenneTwiste
     env = Environment(;
         instance,
         step=1,
-        purchase_hist=Int[],
+        purchase_history=Int[],
         rng=rng,
         seed=seed,
         utility=model(full_features),
@@ -47,57 +52,25 @@ function Environment(instance::Instance; seed=0, rng::AbstractRNG=MersenneTwiste
     return env
 end
 
-Utils.get_seed(env::Environment) = env.seed
-customer_choice_model(b::Environment) = customer_choice_model(b.instance)
-item_count(b::Environment) = item_count(b.instance)
-feature_count(b::Environment) = feature_count(b.instance)
-assortment_size(b::Environment) = assortment_size(b.instance)
-max_steps(b::Environment) = max_steps(b.instance)
-prices(b::Environment) = b.instance.prices
-
-## Basic operations of environment
-
-# Reset the environment
-function Utils.reset!(env::Environment; reset_rng=false, seed=env.seed)
-    reset_rng && Random.seed!(env.rng, seed)
-
-    env.step = 1
-
-    (; prices, starting_hype_and_saturation, features) = env.instance
-    features = vcat(
-        reshape(prices[1:(end - 1)], 1, :), starting_hype_and_saturation, features
-    )
-    env.features .= features
-
-    env.d_features .= 0.0
-
-    model = customer_choice_model(env)
-    env.utility .= model(features)
-
-    empty!(env.purchase_hist)
-    return nothing
-end
-
-function Utils.is_terminated(env::Environment)
-    return env.step > max_steps(env)
-end
+customer_choice_model(env::Environment) = customer_choice_model(env.instance)
+item_count(env::Environment) = item_count(env.instance)
+feature_count(env::Environment) = feature_count(env.instance)
+assortment_size(env::Environment) = assortment_size(env.instance)
+max_steps(env::Environment) = max_steps(env.instance)
+prices(env::Environment) = prices(env.instance)
 
-function Utils.observe(env::Environment)
-    delta_features = env.features[2:3, :] .- env.instance.starting_hype_and_saturation
-    return vcat(
-        env.features,
-        env.d_features,
-        delta_features,
-        ones(1, item_count(env)) .* (env.step / max_steps(env) * 10),
-    ) ./ 10,
-    nothing
-end
+"""
+$TYPEDSIGNATURES
 
-# Compute the hype vector
+Compute an hype multiplier vector based on the purchase history.
+The hype multiplier (equal to 1 by default) for each item is updated as follows:
+- If the item was purchased in the last step, its hype multiplier increases by 0.02.
+- If the item was purchased in the last 2 to 5 steps, its hype multiplier decreases by 0.005.
+"""
 function hype_update(env::Environment)
     N = item_count(env)
     hype_vector = ones(N)
-    hist = env.purchase_hist
+    hist = env.purchase_history
 
     # Define decay factors for each time step
     factors = [0.02, -0.005, -0.005, -0.005, -0.005]
@@ -115,9 +88,13 @@ function hype_update(env::Environment)
     return hype_vector
 end
 
-# Step function
+"""
+$TYPEDSIGNATURES
+
+Updates the environment state after a purchase of `item`.
+"""
 function buy_item!(env::Environment, item::Int)
-    push!(env.purchase_hist, item)
+    push!(env.purchase_history, item)
     env.step += 1
 
     if is_endogenous(env.instance.config)
@@ -137,19 +114,108 @@ function buy_item!(env::Environment, item::Int)
     return nothing
 end
 
-# Choice probabilities
-function choice_probabilities(env::Environment, S)
+"""
+$TYPEDSIGNATURES
+
+Compute the choice probabilities for each item in `assortment`.
+"""
+function choice_probabilities(env::Environment, assortment::BitVector)
     N = item_count(env)
     θ = env.utility
-    exp_values = [exp(θ[i]) * S[i] for i in 1:N]
+    exp_values = [exp(θ[i]) * assortment[i] for i in 1:N]
     push!(exp_values, 1.0) # No purchase action
     denominator = sum(exp_values)
     probs = exp_values ./ denominator
     return probs
 end
 
-# Purchase decision
-function Utils.step!(env::Environment, assortment)
+"""
+$TYPEDSIGNATURES
+
+Compute the expected revenue of offering `assortment`.
+"""
+function compute_expected_revenue(env::Environment, assortment::BitVector)
+    r = prices(env)
+    probs = choice_probabilities(env, assortment)
+    expected_revenue = dot(probs, r)
+    return expected_revenue
+end
+
+"""
+$TYPEDSIGNATURES
+
+Outputs the seed of the environment.
+"""
+Utils.get_seed(env::Environment) = env.seed
+
+"""
+$TYPEDSIGNATURES
+
+Resets the environment to the initial state:
+- reset the rng if `reset_rng` is true
+- reset the step to 1
+- reset the features to the initial features
+- reset the change in features to zero
+- reset the utility to the initial utility
+- clear the purchase history
+"""
+function Utils.reset!(env::Environment; reset_rng=false, seed=env.seed)
+    reset_rng && Random.seed!(env.rng, seed)
+
+    env.step = 1
+
+    (; prices, starting_hype_and_saturation, features) = env.instance
+    features = vcat(
+        reshape(prices[1:(end - 1)], 1, :), starting_hype_and_saturation, features
+    )
+    env.features .= features
+
+    env.d_features .= 0.0
+
+    model = customer_choice_model(env)
+    env.utility .= model(features)
+
+    empty!(env.purchase_history)
+    return nothing
+end
+
+"""
+$TYPEDSIGNATURES
+
+Checks if the environment has reached the maximum number of steps.
+"""
+function Utils.is_terminated(env::Environment)
+    return env.step > max_steps(env)
+end
+
+"""
+$TYPEDSIGNATURES
+
+Features observed by the agent at current step, as a concatenation of:
+- current full features (including prices, hype, saturation, and static features)
+- change in hype and saturation features from the last step
+- change in hype and saturation features from the starting state
+- normalized current step (divided by max steps and multiplied by 10)
+All features are normalized by dividing by 10.
+"""
+function Utils.observe(env::Environment)
+    delta_features = env.features[2:3, :] .- env.instance.starting_hype_and_saturation
+    return vcat(
+        env.features,
+        env.d_features,
+        delta_features,
+        ones(1, item_count(env)) .* (env.step / max_steps(env) * 10),
+    ) ./ 10,
+    nothing
+end
+
+"""
+$TYPEDSIGNATURES
+
+Performs one step in the environment given an assortment.
+Draw an item according to the customer choice model and updates the environment state.
+"""
+function Utils.step!(env::Environment, assortment::BitVector)
     @assert !Utils.is_terminated(env) "Environment is terminated, cannot act!"
     r = prices(env)
     probs = choice_probabilities(env, assortment)
@@ -158,12 +224,3 @@ function Utils.step!(env::Environment, assortment)
     buy_item!(env, item)
     return reward
 end
-
-## Solution functions
-# enumerate all possible assortments of size K and return the best one
-function compute_expected_revenue(env::Environment, S)
-    r = prices(env)
-    probs = choice_probabilities(env, S)
-    expected_revenue = dot(probs, r)
-    return expected_revenue
-end
diff --git a/src/DynamicAssortment/instance.jl b/src/DynamicAssortment/instance.jl
index 3250cdd..f6c3c09 100644
--- a/src/DynamicAssortment/instance.jl
+++ b/src/DynamicAssortment/instance.jl
@@ -17,6 +17,14 @@ $TYPEDFIELDS
     starting_hype_and_saturation::Matrix{Float64}
 end
 
+"""
+$TYPEDSIGNATURES
+
+Generates a random instance:
+- random prices uniformly in [1, 10]
+- random features uniformly in [1, 10]
+- random starting hype and saturation uniformly in [1, 10]
+"""
 function Instance(b::DynamicAssortmentBenchmark, rng::AbstractRNG)
     N = item_count(b)
     d = feature_count(b)
@@ -26,8 +34,10 @@ function Instance(b::DynamicAssortmentBenchmark, rng::AbstractRNG)
     return Instance(; config=b, prices, features, starting_hype_and_saturation)
 end
 
+# Accessor functions
 customer_choice_model(b::Instance) = customer_choice_model(b.config)
 item_count(b::Instance) = item_count(b.config)
 feature_count(b::Instance) = feature_count(b.config)
 assortment_size(b::Instance) = assortment_size(b.config)
 max_steps(b::Instance) = max_steps(b.config)
+prices(b::Instance) = b.prices
diff --git a/src/DynamicAssortment/policies.jl b/src/DynamicAssortment/policies.jl
index 320c501..5739584 100644
--- a/src/DynamicAssortment/policies.jl
+++ b/src/DynamicAssortment/policies.jl
@@ -1,3 +1,8 @@
+"""
+$TYPEDSIGNATURES
+
+Expert policy that computes the optimal assortment by enumerating all possible assortments.
+"""
 function expert_policy(env::Environment)
     N = item_count(env)
     K = assortment_size(env)
@@ -15,7 +20,12 @@ function expert_policy(env::Environment)
     return best_S
 end
 
+"""
+$TYPEDSIGNATURES
+
+Greedy policy that selects the assortment containing items with the highest prices.
+"""
 function greedy_policy(env::Environment)
     maximizer = generate_maximizer(env.instance.config)
-    return maximizer(prices(env))
+    return maximizer(prices(env)[1:item_count(env)])
 end
diff --git a/src/DynamicVehicleScheduling/anticipative_solver.jl b/src/DynamicVehicleScheduling/anticipative_solver.jl
index 5847808..e21ee53 100644
--- a/src/DynamicVehicleScheduling/anticipative_solver.jl
+++ b/src/DynamicVehicleScheduling/anticipative_solver.jl
@@ -75,14 +75,14 @@ function anticipative_solver(
     job_indices = 2:nb_nodes
     epoch_indices = T#first_epoch:last_epoch
 
-    @variable(model, y[i=1:nb_nodes, j=1:nb_nodes, t=epoch_indices]; binary=true)
+    @variable(model, y[i = 1:nb_nodes, j = 1:nb_nodes, t = epoch_indices]; binary=true)
 
     @objective(
         model,
         Max,
         sum(
-            -duration[i, j] * y[i, j, t] for i in 1:nb_nodes, j in 1:nb_nodes,
-            t in epoch_indices
+            -duration[i, j] * y[i, j, t] for
+            i in 1:nb_nodes, j in 1:nb_nodes, t in epoch_indices
         )
     )
 
@@ -157,14 +157,12 @@ function anticipative_solver(
         routes = epoch_routes[i]
         epoch_customers = epoch_indices[epoch]
 
-        y_true =
-            VSPSolution(
-                Vector{Int}[
-                    map(idx -> findfirst(==(idx), epoch_customers), route) for
-                    route in routes
-                ];
-                max_index=length(epoch_customers),
-            ).edge_matrix
+        y_true = VSPSolution(
+            Vector{Int}[
+                map(idx -> findfirst(==(idx), epoch_customers), route) for route in routes
+            ];
+            max_index=length(epoch_customers),
+        ).edge_matrix
 
         location_indices = indices[epoch_customers]
         new_coordinates = env.instance.static_instance.coordinate[location_indices]
@@ -188,7 +186,8 @@ function anticipative_solver(
             is_must_dispatch[2:end] .= true
         else
             is_must_dispatch[2:end] .=
-                planning_start_time .+ epoch_duration .+ @view(new_duration[1, 2:end]) .> new_start_time[2:end]
+                planning_start_time .+ epoch_duration .+ @view(new_duration[1, 2:end]) .>
+                new_start_time[2:end]
         end
         is_postponable[2:end] .= .!is_must_dispatch[2:end]
 
diff --git a/src/DynamicVehicleScheduling/maximizer.jl b/src/DynamicVehicleScheduling/maximizer.jl
index 450ab8a..65e8b60 100644
--- a/src/DynamicVehicleScheduling/maximizer.jl
+++ b/src/DynamicVehicleScheduling/maximizer.jl
@@ -93,7 +93,7 @@ function prize_collecting_vsp(
     nb_nodes = nv(graph)
     job_indices = 2:(nb_nodes)
 
-    @variable(model, y[i=1:nb_nodes, j=1:nb_nodes; has_edge(graph, i, j)] >= 0)
+    @variable(model, y[i = 1:nb_nodes, j = 1:nb_nodes; has_edge(graph, i, j)] >= 0)
 
     θ_ext = fill(0.0, location_count(instance))  # no prize for must dispatch requests, only hard constraints
     θ_ext[instance.is_postponable] .= θ
@@ -129,9 +129,7 @@ end
 
 function oracle(θ; instance::DVSPState, kwargs...)
     routes = prize_collecting_vsp(θ; instance=instance, kwargs...)
-    return VSPSolution(
-        routes; max_index=location_count(instance.state_instance)
-    ).edge_matrix
+    return VSPSolution(routes; max_index=location_count(instance.state_instance)).edge_matrix
 end
 
 function g(y; instance, kwargs...)
diff --git a/src/StochasticVehicleScheduling/instance/instance.jl b/src/StochasticVehicleScheduling/instance/instance.jl
index 090c69a..0c09e6f 100644
--- a/src/StochasticVehicleScheduling/instance/instance.jl
+++ b/src/StochasticVehicleScheduling/instance/instance.jl
@@ -39,8 +39,8 @@ function create_VSP_graph(city::City)
     job_tasks = 2:(city.nb_tasks + 1)
 
     travel_times = [
-        distance(task1.end_point, task2.start_point) for task1 in city.tasks,
-        task2 in city.tasks
+        distance(task1.end_point, task2.start_point) for
+        task1 in city.tasks, task2 in city.tasks
     ]
 
     # Create existing edges
diff --git a/src/StochasticVehicleScheduling/maximizer.jl b/src/StochasticVehicleScheduling/maximizer.jl
index 66e9eb3..515d989 100644
--- a/src/StochasticVehicleScheduling/maximizer.jl
+++ b/src/StochasticVehicleScheduling/maximizer.jl
@@ -14,7 +14,7 @@ function vsp_maximizer(
     nb_nodes = nv(graph)
     job_indices = 2:(nb_nodes - 1)
 
-    @variable(model, y[i=1:nb_nodes, j=1:nb_nodes; has_edge(graph, i, j)], Bin)
+    @variable(model, y[i = 1:nb_nodes, j = 1:nb_nodes; has_edge(graph, i, j)], Bin)
 
     @objective(
         model,
diff --git a/src/StochasticVehicleScheduling/solution/algorithms/local_search.jl b/src/StochasticVehicleScheduling/solution/algorithms/local_search.jl
index 49ae00c..87fe47b 100644
--- a/src/StochasticVehicleScheduling/solution/algorithms/local_search.jl
+++ b/src/StochasticVehicleScheduling/solution/algorithms/local_search.jl
@@ -11,8 +11,8 @@ function solve_deterministic_VSP(
     (; city, graph) = instance
 
     travel_times = [
-        distance(task1.end_point, task2.start_point) for task1 in city.tasks,
-        task2 in city.tasks
+        distance(task1.end_point, task2.start_point) for
+        task1 in city.tasks, task2 in city.tasks
     ]
 
     model = model_builder()
@@ -21,7 +21,7 @@ function solve_deterministic_VSP(
     nb_nodes = nv(graph)
     job_indices = 2:(nb_nodes - 1)
 
-    @variable(model, x[i=1:nb_nodes, j=1:nb_nodes; has_edge(graph, i, j)], Bin)
+    @variable(model, x[i = 1:nb_nodes, j = 1:nb_nodes; has_edge(graph, i, j)], Bin)
 
     @objective(
         model,
diff --git a/src/gurobi_setup.jl b/src/gurobi_setup.jl
index c618ce4..847df66 100644
--- a/src/gurobi_setup.jl
+++ b/src/gurobi_setup.jl
@@ -7,6 +7,8 @@ const GRB_ENV = Ref{Gurobi.Env}()
 GRB_ENV[] = Gurobi.Env()
 export GRB_ENV
 
+@info "You can now use `grb_model()` as a model builder in JuMP."
+
 """
 $TYPEDSIGNATURES
 
diff --git a/test/code.jl b/test/code.jl
index cd2fb64..ad2e5f7 100644
--- a/test/code.jl
+++ b/test/code.jl
@@ -14,7 +14,9 @@ end
 
 @testitem "JuliaFormatter" begin
     using JuliaFormatter
-    JuliaFormatter.format(DecisionFocusedLearningBenchmarks; verbose=false, overwrite=false)
+    @test JuliaFormatter.format(
+        DecisionFocusedLearningBenchmarks; verbose=false, overwrite=false
+    )
 end
 
 @testitem "Documenter" begin
diff --git a/test/dynamic_assortment.jl b/test/dynamic_assortment.jl
index 54030fe..4d9db08 100644
--- a/test/dynamic_assortment.jl
+++ b/test/dynamic_assortment.jl
@@ -1,32 +1,338 @@
-@testitem "dynamic Assortment" begin
-    using DecisionFocusedLearningBenchmarks
-    using Statistics: mean
+@testsnippet DAPSetup begin
+    const DAP = DecisionFocusedLearningBenchmarks.DynamicAssortment
+end
 
+@testitem "DynamicAssortment - Benchmark Construction" setup=[Imports, DAPSetup] begin
+    # Test default constructor
     b = DynamicAssortmentBenchmark()
-
+    @test b.N == 20
+    @test b.d == 2
+    @test b.K == 4
+    @test b.max_steps == 80
     @test is_endogenous(b)
     @test !is_exogenous(b)
 
-    dataset = generate_dataset(b, 10; seed=0)
-    environments = generate_environments(b, dataset)
+    # Test custom constructor
+    b_custom = DynamicAssortmentBenchmark(N=10, d=3, K=2, max_steps=50, exogenous=true)
+    @test b_custom.N == 10
+    @test b_custom.d == 3
+    @test b_custom.K == 2
+    @test b_custom.max_steps == 50
+    @test !is_endogenous(b_custom)
+    @test is_exogenous(b_custom)
 
-    env = environments[1]
-    get_seed(env)
-    env.seed
+    # Test accessor functions
+    @test DAP.item_count(b) == 20
+    @test DAP.feature_count(b) == 2
+    @test DAP.assortment_size(b) == 4
+    @test DAP.max_steps(b) == 80
+end
+
+@testitem "DynamicAssortment - Instance Generation" setup=[Imports, DAPSetup] begin
+    b = DynamicAssortmentBenchmark(N=5, d=3, K=2)
+    rng = MersenneTwister(42)
+
+    instance = DAP.Instance(b, rng)
+
+    # Test instance structure
+    @test length(instance.prices) == 6  # N items + 1 no-purchase action
+    @test instance.prices[end] == 0.0  # No-purchase action has price 0
+    @test all(1.0 ≤ p ≤ 10.0 for p in instance.prices[1:(end - 1)])  # Prices in [1, 10]
+
+    @test size(instance.features) == (3, 5)  # (d, N)
+    @test all(1.0 ≤ f ≤ 10.0 for f in instance.features)  # Features in [1, 10]
+
+    @test size(instance.starting_hype_and_saturation) == (2, 5)  # (2, N)
+    @test all(1.0 ≤ f ≤ 10.0 for f in instance.starting_hype_and_saturation)
+
+    # Test accessor functions
+    @test DAP.item_count(instance) == 5
+    @test DAP.feature_count(instance) == 3
+    @test DAP.assortment_size(instance) == 2
+    @test DAP.max_steps(instance) == 80
+    @test DAP.prices(instance) == instance.prices
+end
+
+@testitem "DynamicAssortment - Environment Initialization" setup=[Imports, DAPSetup] begin
+    b = DynamicAssortmentBenchmark(N=5, d=2, K=2, max_steps=10)
+    instance = DAP.Instance(b, MersenneTwister(42))
+
+    env = DAP.Environment(instance; seed=123)
+
+    # Test initial state
+    @test env.step == 1
+    @test isempty(env.purchase_history)
+    @test env.seed == 123
+    @test !is_terminated(env)
+
+    # Test features structure: [prices; hype_saturation; static_features]
+    @test size(env.features) == (5, 5)  # (1 + 2 + d, N) = (1 + 2 + 2, 5)
+    @test env.features[1, :] == instance.prices[1:(end - 1)]  # First row is prices (excluding no-purchase)
+
+    # Test utility computation
+    @test length(env.utility) == 5  # One utility per item
+
+    # Test accessor functions
+    @test DAP.item_count(env) == 5
+    @test DAP.feature_count(env) == 2
+    @test DAP.assortment_size(env) == 2
+    @test DAP.max_steps(env) == 10
+    @test DAP.prices(env) == instance.prices
+end
+
+@testitem "DynamicAssortment - Environment Reset" setup=[Imports, DAPSetup] begin
+    b = DynamicAssortmentBenchmark(N=3, d=1, K=2, max_steps=5)
+    instance = DAP.Instance(b, MersenneTwister(42))
+    env = DAP.Environment(instance; seed=123)
+
+    # Modify environment state
+    env.step = 3
+    push!(env.purchase_history, 1, 2)
+    env.features[2, 1] *= 1.5  # Modify hype
+
+    # Reset environment
+    reset!(env)
+
+    # Check reset state
+    @test env.step == 1
+    @test isempty(env.purchase_history)
+    @test all(env.d_features .== 0.0)
+
+    # Features should be reset to initial values
+    expected_features = vcat(
+        reshape(instance.prices[1:(end - 1)], 1, :),
+        instance.starting_hype_and_saturation,
+        instance.features,
+    )
+    @test env.features ≈ expected_features
+end
+
+@testitem "DynamicAssortment - Hype Update Logic" setup=[Imports, DAPSetup] begin
+    b = DynamicAssortmentBenchmark(N=5, d=1, K=2)
+    instance = DAP.Instance(b, MersenneTwister(42))
+    env = DAP.Environment(instance; seed=123)
+
+    # Test hype update with no history
+    hype = DAP.hype_update(env)
+    @test all(hype .== 1.0)  # Should be all ones with no history
+
+    # Test hype update with recent purchase
+    push!(env.purchase_history, 2)  # Purchase item 2
+    hype = DAP.hype_update(env)
+    @test hype[2] ≈ 1.02  # Should increase by 0.02
+    @test all(hype[i] == 1.0 for i in [1, 3, 4, 5])  # Others unchanged
+
+    # Test hype update with older purchases
+    push!(env.purchase_history, 3, 2, 1)  # More purchases
+    hype = DAP.hype_update(env)
+    @test hype[1] ≈ 1.02
+    @test hype[2] ≈ 0.99
+    @test hype[3] ≈ 0.995
+
+    # Test with no-purchase action (item > N)
+    env.purchase_history = [6]  # No-purchase action
+    hype = DAP.hype_update(env)
+    @test all(hype .== 1.0)  # Should not affect any item hype
+end
+
+@testitem "DynamicAssortment - Choice Probabilities" setup=[Imports, DAPSetup] begin
+    b = DynamicAssortmentBenchmark(N=3, d=1, K=2)
+    instance = DAP.Instance(b, MersenneTwister(42))
+    env = DAP.Environment(instance; seed=123)
+
+    # Test with full assortment
+    assortment = trues(3)
+    probs = DAP.choice_probabilities(env, assortment)
+
+    @test length(probs) == 4  # 3 items + no-purchase
+    @test sum(probs) ≈ 1.0  # Probabilities sum to 1
+    @test all(probs .≥ 0.0)  # All probabilities non-negative
+
+    # Test with partial assortment
+    assortment = falses(3)
+    assortment[1] = true
+    assortment[3] = true
+    probs = DAP.choice_probabilities(env, assortment)
+
+    @test probs[2] == 0.0  # Item 2 not in assortment, so probability 0
+    @test probs[1] > 0.0   # Item 1 in assortment
+    @test probs[3] > 0.0   # Item 3 in assortment
+    @test probs[4] > 0.0   # No-purchase always available
+    @test sum(probs) ≈ 1.0
+
+    # Test empty assortment
+    assortment = falses(3)
+    probs = DAP.choice_probabilities(env, assortment)
+    @test all(probs[1:3] .== 0.0)  # No items available
+    @test probs[4] ≈ 1.0  # Only no-purchase available
+end
+
+@testitem "DynamicAssortment - Expected Revenue" setup=[Imports, DAPSetup] begin
+    b = DynamicAssortmentBenchmark(N=3, d=1, K=2)
+    instance = DAP.Instance(b, MersenneTwister(42))
+    env = DAP.Environment(instance; seed=123)
+
+    # Test with full assortment
+    assortment = trues(3)
+    revenue = DAP.compute_expected_revenue(env, assortment)
+    @test revenue ≥ 0.0
+
+    # Test with empty assortment
+    assortment = falses(3)
+    revenue = DAP.compute_expected_revenue(env, assortment)
+    @test revenue == 0.0  # Only no-purchase available with price 0
+end
+
+@testitem "DynamicAssortment - Environment Step" setup=[Imports, DAPSetup] begin
+    b = DynamicAssortmentBenchmark(N=3, d=1, K=2, max_steps=5)
+    instance = DAP.Instance(b, MersenneTwister(42))
+    env = DAP.Environment(instance; seed=123)
+
+    initial_step = env.step
+    assortment = trues(3)
+
+    # Take a step
+    reward = step!(env, assortment)
+
+    # Check step progression
+    @test env.step == initial_step + 1
+    @test length(env.purchase_history) == 1
+    @test reward ≥ 0.0  # Reward should be non-negative (price or 0)
+
+    # Check reward is valid price
+    purchased_item = env.purchase_history[1]
+    if purchased_item <= 3
+        @test reward == instance.prices[purchased_item]
+    else
+        @test reward == 0.0  # No-purchase action
+    end
+
+    # Test termination
+    for _ in 1:(DAP.max_steps(env) - 1)
+        if !is_terminated(env)
+            step!(env, assortment)
+        end
+    end
+    @test is_terminated(env)
+
+    # Test error on terminated environment
+    @test_throws AssertionError step!(env, assortment)
+end
+
+@testitem "DynamicAssortment - Endogenous vs Exogenous" setup=[Imports, DAPSetup] begin
+    # Test endogenous environment (features change with purchases)
+    b_endo = DynamicAssortmentBenchmark(N=3, d=1, K=2, exogenous=false)
+    instance_endo = DAP.Instance(b_endo, MersenneTwister(42))
+    env_endo = DAP.Environment(instance_endo; seed=123)
+
+    initial_features_endo = copy(env_endo.features)
+    DAP.buy_item!(env_endo, 1)
+
+    @test env_endo.features != initial_features_endo  # Features should change
+    @test any(env_endo.d_features .!= 0.0)  # Delta features should be non-zero
+
+    # Test exogenous environment (features don't change with purchases)
+    b_exo = DynamicAssortmentBenchmark(N=3, d=1, K=2, exogenous=true)
+    instance_exo = DAP.Instance(b_exo, MersenneTwister(42))
+    env_exo = DAP.Environment(instance_exo; seed=123)
+
+    initial_features_exo = copy(env_exo.features)
+    DAP.buy_item!(env_exo, 1)
+
+    @test env_exo.features == initial_features_exo  # Features should not change
+    @test all(env_exo.d_features .== 0.0)  # Delta features should remain zero
+end
+
+@testitem "DynamicAssortment - Observation" setup=[Imports, DAPSetup] begin
+    b = DynamicAssortmentBenchmark(N=3, d=2, max_steps=10)
+    instance = DAP.Instance(b, MersenneTwister(42))
+    env = DAP.Environment(instance; seed=123)
+
+    obs, info = observe(env)
+
+    # Check observation dimensions: (d + 8, N)
+    # Features: prices(1) + hype_sat(2) + static(d) + d_features(2) + delta_features(2) + step(1)
+    expected_rows = 2 + 8  # d + 8 where d=2
+    @test size(obs) == (expected_rows, 3)
+    @test info === nothing
+
+    @test all(-1.0 ≤ x ≤ 1.0 for x in obs)
+
+    # Test observation changes with step
+    obs1, _ = observe(env)
+    DAP.buy_item!(env, 1)
+    obs2, _ = observe(env)
 
+    @test obs1 != obs2  # Observations should differ after purchase
+end
+
+@testitem "DynamicAssortment - Policies" setup=[Imports, DAPSetup] begin
+    using Statistics: mean
+
+    b = DynamicAssortmentBenchmark(N=5, d=2, K=3, max_steps=20)
+
+    # Generate test data
+    dataset = generate_dataset(b, 5; seed=0)
+    environments = generate_environments(b, dataset)
+
+    # Get policies
     policies = generate_policies(b)
     expert = policies[1]
     greedy = policies[2]
 
+    @test expert.name == "Expert"
+    @test greedy.name == "Greedy"
+
+    # Test policy evaluation
     r_expert, d = evaluate_policy!(expert, environments)
     r_greedy, _ = evaluate_policy!(greedy, environments)
 
-    @test mean(r_expert) >= mean(r_greedy)
+    @test length(r_expert) == length(environments)
+    @test length(r_greedy) == length(environments)
+    @test all(r_expert .≥ 0.0)
+    @test all(r_greedy .≥ 0.0)
+
+    # Expert should generally outperform greedy (or at least not be worse on average)
+    @test mean(r_expert) ≥ mean(r_greedy)
+
+    # Test policy output format
+    env = environments[1]
+    reset!(env)
 
-    model = generate_statistical_model(b)
+    expert_action = expert(env)
+    greedy_action = greedy(env)
+    @test length(expert_action) == DAP.item_count(env)
+    @test length(greedy_action) == DAP.item_count(env)
+    @test sum(expert_action) == DAP.assortment_size(env)
+    @test sum(greedy_action) == DAP.assortment_size(env)
+end
+
+@testitem "DynamicAssortment - Model and Maximizer Integration" setup=[Imports, DAPSetup] begin
+    b = DynamicAssortmentBenchmark(N=4, d=3, K=2)
+
+    # Test statistical model generation
+    model = generate_statistical_model(b; seed=42)
+    # Test maximizer generation
     maximizer = generate_maximizer(b)
-    sample = d[1]
+
+    # Test integration with sample data
+    sample = generate_sample(b, MersenneTwister(42))
+    @test hasfield(typeof(sample), :instance)
+
+    dataset = generate_dataset(b, 3; seed=42)
+    environments = generate_environments(b, dataset)
+
+    # Evaluate policy to get data samples
+    policies = generate_policies(b)
+    _, data_samples = evaluate_policy!(policies[1], environments)
+
+    # Test model-maximizer pipeline
+    sample = data_samples[1]
     x = sample.x
     θ = model(x)
     y = maximizer(θ)
+
+    @test length(θ) == DAP.item_count(b)
+    @test length(y) == DAP.item_count(b)
+    @test sum(y) == DAP.assortment_size(b)
 end
diff --git a/test/runtests.jl b/test/runtests.jl
index e73befd..2b90cad 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -1,3 +1,8 @@
 using TestItemRunner
 
+@testsnippet Imports begin
+    using DecisionFocusedLearningBenchmarks
+    using Random
+end
+
 @run_package_tests verbose = true