Skip to content

Commit c9b4280

Browse files
committed
checkpointing
1 parent fed0d66 commit c9b4280

File tree

3 files changed

+105
-70
lines changed

3 files changed

+105
-70
lines changed

docsrc/users-guide/examples/MCMC Sampling.ipynb

Lines changed: 46 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -80,8 +80,8 @@
8080
"name": "stdout",
8181
"output_type": "stream",
8282
"text": [
83-
"Enabling notebook extension jupyter-js-widgets/extension...\r\n",
84-
" - Validating: \u001b[32mOK\u001b[0m\r\n"
83+
"Enabling notebook extension jupyter-js-widgets/extension...\n",
84+
" - Validating: \u001b[32mOK\u001b[0m\n"
8585
]
8686
}
8787
],
@@ -119,7 +119,16 @@
119119
"cell_type": "code",
120120
"execution_count": 2,
121121
"metadata": {},
122-
"outputs": [],
122+
"outputs": [
123+
{
124+
"name": "stderr",
125+
"output_type": "stream",
126+
"text": [
127+
"18:01:39 - cmdstanpy - INFO - compiling stan file /Users/mitzi/github/stan-dev/cmdstanpy/docsrc/users-guide/examples/bernoulli.stan to exe file /Users/mitzi/github/stan-dev/cmdstanpy/docsrc/users-guide/examples/bernoulli\n",
128+
"18:01:49 - cmdstanpy - INFO - compiled model executable: /Users/mitzi/github/stan-dev/cmdstanpy/docsrc/users-guide/examples/bernoulli\n"
129+
]
130+
}
131+
],
123132
"source": [
124133
"import os\n",
125134
"from cmdstanpy import CmdStanModel\n",
@@ -146,13 +155,13 @@
146155
"name": "stderr",
147156
"output_type": "stream",
148157
"text": [
149-
"17:32:09 - cmdstanpy - INFO - CmdStan start processing\n"
158+
"18:01:49 - cmdstanpy - INFO - CmdStan start processing\n"
150159
]
151160
},
152161
{
153162
"data": {
154163
"application/vnd.jupyter.widget-view+json": {
155-
"model_id": "651da295dcfe4ee2837f9d61d1371b1c",
164+
"model_id": "a37079ab4f4a4d859fb352e4196a89be",
156165
"version_major": 2,
157166
"version_minor": 0
158167
},
@@ -166,7 +175,7 @@
166175
{
167176
"data": {
168177
"application/vnd.jupyter.widget-view+json": {
169-
"model_id": "75265e5c3d1f4f028ef623cbbaf0c07c",
178+
"model_id": "bdabe917156047c5b4e75d4438fa0fac",
170179
"version_major": 2,
171180
"version_minor": 0
172181
},
@@ -180,7 +189,7 @@
180189
{
181190
"data": {
182191
"application/vnd.jupyter.widget-view+json": {
183-
"model_id": "763a5fa099bf4b178791a59fde6e1a9d",
192+
"model_id": "a3d2c041ae344ecabbbab0904b41d35e",
184193
"version_major": 2,
185194
"version_minor": 0
186195
},
@@ -194,7 +203,7 @@
194203
{
195204
"data": {
196205
"application/vnd.jupyter.widget-view+json": {
197-
"model_id": "05091cf113994122bf42b361c9011d5b",
206+
"model_id": "d067c2c5b0d2418ca779ba0ef4c1e074",
198207
"version_major": 2,
199208
"version_minor": 0
200209
},
@@ -216,7 +225,7 @@
216225
"name": "stderr",
217226
"output_type": "stream",
218227
"text": [
219-
"17:32:09 - cmdstanpy - INFO - CmdStan done processing.\n"
228+
"18:01:50 - cmdstanpy - INFO - CmdStan done processing.\n"
220229
]
221230
},
222231
{
@@ -254,12 +263,15 @@
254263
"text/plain": [
255264
"CmdStanMCMC: model=bernoulli chains=4['method=sample', 'algorithm=hmc', 'adapt', 'engaged=1']\n",
256265
" csv_files:\n",
257-
"\t/var/folders/db/4jnggnf549s42z50bd61jskm0000gq/T/tmp86q_w0yg/bernoullibqw94rqo/bernoulli-20220626173209_1.csv\n",
258-
"\t/var/folders/db/4jnggnf549s42z50bd61jskm0000gq/T/tmp86q_w0yg/bernoullibqw94rqo/bernoulli-20220626173209_2.csv\n",
259-
"\t/var/folders/db/4jnggnf549s42z50bd61jskm0000gq/T/tmp86q_w0yg/bernoullibqw94rqo/bernoulli-20220626173209_3.csv\n",
260-
"\t/var/folders/db/4jnggnf549s42z50bd61jskm0000gq/T/tmp86q_w0yg/bernoullibqw94rqo/bernoulli-20220626173209_4.csv\n",
266+
"\t/var/folders/db/4jnggnf549s42z50bd61jskm0000gq/T/tmp6qj5nilv/bernoullibxif2srf/bernoulli-20220815180149_1.csv\n",
267+
"\t/var/folders/db/4jnggnf549s42z50bd61jskm0000gq/T/tmp6qj5nilv/bernoullibxif2srf/bernoulli-20220815180149_2.csv\n",
268+
"\t/var/folders/db/4jnggnf549s42z50bd61jskm0000gq/T/tmp6qj5nilv/bernoullibxif2srf/bernoulli-20220815180149_3.csv\n",
269+
"\t/var/folders/db/4jnggnf549s42z50bd61jskm0000gq/T/tmp6qj5nilv/bernoullibxif2srf/bernoulli-20220815180149_4.csv\n",
261270
" output_files:\n",
262-
"\t/var/folders/db/4jnggnf549s42z50bd61jskm0000gq/T/tmp86q_w0yg/bernoullibqw94rqo/bernoulli-20220626173209-stdout.txt"
271+
"\t/var/folders/db/4jnggnf549s42z50bd61jskm0000gq/T/tmp6qj5nilv/bernoullibxif2srf/bernoulli-20220815180149_0-stdout.txt\n",
272+
"\t/var/folders/db/4jnggnf549s42z50bd61jskm0000gq/T/tmp6qj5nilv/bernoullibxif2srf/bernoulli-20220815180149_1-stdout.txt\n",
273+
"\t/var/folders/db/4jnggnf549s42z50bd61jskm0000gq/T/tmp6qj5nilv/bernoullibxif2srf/bernoulli-20220815180149_2-stdout.txt\n",
274+
"\t/var/folders/db/4jnggnf549s42z50bd61jskm0000gq/T/tmp6qj5nilv/bernoullibxif2srf/bernoulli-20220815180149_3-stdout.txt"
263275
]
264276
},
265277
"execution_count": 4,
@@ -271,6 +283,26 @@
271283
"fit"
272284
]
273285
},
286+
{
287+
"cell_type": "code",
288+
"execution_count": 25,
289+
"metadata": {},
290+
"outputs": [
291+
{
292+
"data": {
293+
"text/plain": [
294+
"dict_keys(['lp__', 'accept_stat__', 'stepsize__', 'treedepth__', 'n_leapfrog__', 'divergent__', 'energy__'])"
295+
]
296+
},
297+
"execution_count": 25,
298+
"metadata": {},
299+
"output_type": "execute_result"
300+
}
301+
],
302+
"source": [
303+
"fit.method_variables().keys()"
304+
]
305+
},
274306
{
275307
"cell_type": "markdown",
276308
"metadata": {},

docsrc/users-guide/hello_world.rst

Lines changed: 38 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -105,65 +105,64 @@ By default, the `sample` method runs 4 sampler chains.
105105
fit = model.sample(data=data_file)
106106
107107
108+
*Note* this model can be fit using other methods
109+
110+
+ the :meth:`~CmdStanModel.variational` method does approximate Bayesian inference and returns a :class:`CmdStanVB` object
111+
+ the :meth:`~CmdStanModel.optimize` method does maximum likelihood estimation and returns a :class:`CmdStanMLE` object
112+
108113
Accessing the results
109114
^^^^^^^^^^^^^^^^^^^^^
110115

111-
The ``sample`` method returns a :class:`CmdStanMCMC` object,
112-
which provides access to the information from the Stan CSV files.
113-
The CSV header and data rows contain the outputs from each iteration of the sampler.
114-
CSV comment blocks are used to report the inference engine configuration and timing information.
115-
The NUTS-HMC adaptive sampler algorithm also outputs the per-chain HMC tuning parameters step_size and metric.
116+
The sampler outputs are the set of per-chain
117+
`Stan CSV files <https://mc-stan.org/docs/cmdstan-guide/stan-csv.html>`_,
118+
a non-standard CSV file format.
119+
Each data row of the Stan CSV file contains the per-iteration estimate of the Stan model
120+
parameters, transformed parameters, and generated quantities variables.
121+
Container variables, i.e., vector, row-vector, matrix, and array variables
122+
are necessarily serialized into a single row's worth of data.
123+
The output objects parse the set of Stan CSV files into a set of in-memory data structures
124+
and provide accessor functions for the all estimates and metadata.
125+
CmdStanPy makes a distinction between the per-iteration model outputs
126+
and the per-iteration algorithm outputs: the former are 'stan_variables'
127+
and the latter are 'method_variables'.
116128

117-
The ``CmdStanMCMC`` object parses the set of Stan CSV files into separate in-memory data structures for
118-
the set of sampler iterations, the metadata, and the step_size and metric and provides accessor methods for each.
119-
The primary object of interest are the draws from all iterations of the sampler, i.e., the CSV data rows.
120-
The ``CmdStanMCMC`` methods allow the user to extract the sample in whatever data format is needed for their analysis.
121-
The sample can be extracted in tabular format, either as
129+
The `CmdStanMCMC` object provides the following accessor methods:
122130

123-
+ a numpy.ndarray: :meth:`~CmdStanMCMC.draws`
131+
+ :meth:`~CmdStanMCMC.stan_variable`: returns an numpy.ndarray whose structure corresponds to the Stan program variable structure
124132

125-
+ a pandas.DataFrame: :meth:`~CmdStanMCMC.draws_pd`
133+
+ :meth:`~CmdStanMCMC.stan_variables`: returns an Python dictionary mapping the Stan program variable names to the corresponding numpy.ndarray.
126134

127-
.. ipython:: python
135+
+ :meth:`~CmdStanMCMC.draws`: returns a numpy.ndarray which is either a 3-D array draws X chains X CSV columns,
136+
or a 2-D array draws X columns, where the chains are concatenated into a single column.
137+
The argument `vars` can be used to restrict this to just the columns for one or more variables.
128138

129-
print(fit.draws().shape)
130-
print(fit.draws(concat_chains=True).shape)
131-
fit.draws_pd()
139+
+ :meth:`~CmdStanMCMC.draws_pd`: returns a pandas.DataFrame over all columns in the Stan CSV file.
140+
The argument `vars` can be used to restrict this to one or more variables.
132141

133-
The sample can be treated as a collection of named, structured variables.
134-
CmdStanPy makes a distinction between the per-iteration model outputs
135-
and the per-iteration algorithm outputs: the former are 'stan_variables'
136-
and the information reported by the sampler are 'method_variables'.
137-
Accessor functions extract these as:
142+
+ :meth:`~CmdStanMCMC.draws_xr`: returns an xarray.Dataset which maps model variable names to their respective values.
143+
The argument `vars` can be used to restrict this to one or more variables.
138144

139-
+ a structured numpy.ndarray: :meth:`~CmdStanMCMC.stan_variable`
140-
which contains the set of all draws in the sample for the named Stan program variable.
141-
The draws from all chains are flattened, i.e.,
142-
the first ndarray dimension is the number of draws X number of chains.
143-
The remaining ndarray dimensions correspond to the Stan program variable dimension.
144-
145-
+ an xarray.Dataset: :meth:`~CmdStanMCMC.draws_xr`
146-
147-
+ a Python dict mapping Stan variable names to numpy.ndarray objects, where the
148-
chains are flattened, as above:
149-
:meth:`~CmdStanMCMC.stan_variables`.
150-
151-
+ a Python dict mapping the algorithm outputs to numpy.ndarray objects.
152-
Because these outputs are used for within-chain and cross-chain diagnostics,
153-
they are not flattened.
154-
:meth:`~CmdStanMCMC.stan_variables`.
145+
+ :meth:`~CmdStanMCMC.method_variables`: returns a Python dictionary over the sampler diagnostic/information output columns
146+
which by convention end in ``__``, e.g., ``lp__``.
155147

156148

157149
.. ipython:: python
158150
151+
# access model variable by name
159152
print(fit.stan_variable('theta'))
153+
print(fit.draws_pd('theta')[:3])
160154
print(fit.draws_xr('theta'))
155+
# access all model variables
161156
for k, v in fit.stan_variables().items():
162157
print(f'{k}\t{v.shape}')
158+
# access the sampler method variables
163159
for k, v in fit.method_variables().items():
164160
print(f'{k}\t{v.shape}')
161+
# access all Stan CSV file columns
162+
print(f'numpy.ndarray of draws: {fit.draws().shape}')
163+
fit.draws_pd()
165164
166-
165+
167166
In addition to the MCMC sample itself, the CmdStanMCMC object provides
168167
access to the the per-chain HMC tuning parameters from the NUTS-HMC adaptive sampler,
169168
(if present).
@@ -175,7 +174,6 @@ access to the the per-chain HMC tuning parameters from the NUTS-HMC adaptive sam
175174
print(fit.step_size)
176175
177176
178-
179177
The CmdStanMCMC object also provides access to metadata about the model and the sampler run.
180178

181179
.. ipython:: python
@@ -189,6 +187,7 @@ The CmdStanMCMC object also provides access to metadata about the model and the
189187
190188
191189
190+
192191
CmdStan utilities: ``stansummary``, ``diagnose``
193192
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
194193

docsrc/users-guide/workflow.rst

Lines changed: 21 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -193,26 +193,30 @@ the object's :attr:`~CmdStanMCMC.metadata` property.
193193
Output data
194194
-----------
195195

196-
The CSV data is assembled into the inference result object.
197-
CmdStanPy provides accessor methods which return this information
196+
The resulting Stan CSV file or set of files are assembled into an inference result object.
197+
198+
+ :class:`CmdStanMCMC` object contains the :meth:`~CmdStanModel.sample` outputs
199+
+ :class:`CmdStanVB` object contains the :meth:`~CmdStanModel.variational` outputs
200+
+ :class:`CmdStanMLE` object contains the :meth:`~CmdStanModel.optimize` outputs
201+
+ :class:`CmdStanGQ` object contains the :meth:`~CmdStanModel.generate_quantities` outputs
202+
203+
204+
The objects provide accessor methods which return this information
198205
either as columnar data (i.e., in terms of the CSV file columns),
199206
or as method and model variables.
200207

201-
The :meth:`~CmdStanMCMC.draws` and :meth:`~CmdStanMCMC.draws_pd` methods
202-
for both :class:`CmdStanMCMC` and :class:`CmdStanGQ` return the sample contents
203-
in columnar format, as a numpy.ndarray or pandas.DataFrame, respectively. Similarly,
204-
the :meth:`~CmdStanMCMC.draws_xr` method of these two objects returns the sample
205-
contents as an :py:class:`xarray.Dataset` which maps the method and model variable
206-
names to their respective values.
208+
The ``stan_variables`` method returns a Python dict over all Stan model variables,
209+
see :meth:`~CmdStanMCMC.stan_variables`.
210+
211+
The ``stan_variable`` method returns a single model variable as a numpy.ndarray object
212+
with the same structure (per draw) as the Stan program variable,
213+
see :meth:`~CmdStanMCMC.stan_variable`.
207214

208-
The :meth:`~CmdStanMCMC.method_variables` method returns a Python dict over all inference
209-
method variables.
215+
The ``method_variables`` method returns a Python dict over all inference
216+
method variables, cf :meth:`~CmdStanMCMC.method_variables`
210217

211-
All inference objects expose the following methods:
212218

213-
The :meth:`~CmdStanMCMC.stan_variable` method to returns a numpy.ndarray object
214-
which contains the set of all draws in the sample for the named Stan program variable.
215-
The draws from all chains are flattened into a single drawset.
216-
The first ndarray dimension is the number of draws X number of chains.
217-
The remaining ndarray dimensions correspond to the Stan program variable dimension.
218-
The :meth:`~CmdStanMCMC.stan_variables` method returns a Python dict over all Stan model variables.
219+
The output from the methods :class:`CmdStanMCMC` and :class:`CmdStanGQ` return the sample contents
220+
in tabular form, see :meth:`~CmdStanMCMC.draws` and :meth:`~CmdStanMCMC.draws_pd`.
221+
Similarly, the :meth:`~CmdStanMCMC.draws_xr` method returns the sample
222+
contents as an :py:class:`xarray.Dataset` which is a mapping from variable names to their respective values.

0 commit comments

Comments
 (0)