checkpointing

mitzimorris · mitzimorris · commit c9b42808d114 · 2022-08-15T19:14:36.000-04:00
diff --git a/docsrc/users-guide/examples/MCMC Sampling.ipynb b/docsrc/users-guide/examples/MCMC Sampling.ipynb
@@ -80,8 +80,8 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Enabling notebook extension jupyter-js-widgets/extension...\r\n",
-      "      - Validating: \u001b[32mOK\u001b[0m\r\n"
+      "Enabling notebook extension jupyter-js-widgets/extension...\n",
+      "      - Validating: \u001b[32mOK\u001b[0m\n"
      ]
     }
    ],
@@ -119,7 +119,16 @@
    "cell_type": "code",
    "execution_count": 2,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "18:01:39 - cmdstanpy - INFO - compiling stan file /Users/mitzi/github/stan-dev/cmdstanpy/docsrc/users-guide/examples/bernoulli.stan to exe file /Users/mitzi/github/stan-dev/cmdstanpy/docsrc/users-guide/examples/bernoulli\n",
+      "18:01:49 - cmdstanpy - INFO - compiled model executable: /Users/mitzi/github/stan-dev/cmdstanpy/docsrc/users-guide/examples/bernoulli\n"
+     ]
+    }
+   ],
    "source": [
     "import os\n",
     "from cmdstanpy import CmdStanModel\n",
@@ -146,13 +155,13 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "17:32:09 - cmdstanpy - INFO - CmdStan start processing\n"
+      "18:01:49 - cmdstanpy - INFO - CmdStan start processing\n"
      ]
     },
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "651da295dcfe4ee2837f9d61d1371b1c",
+       "model_id": "a37079ab4f4a4d859fb352e4196a89be",
        "version_major": 2,
        "version_minor": 0
       },
@@ -166,7 +175,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "75265e5c3d1f4f028ef623cbbaf0c07c",
+       "model_id": "bdabe917156047c5b4e75d4438fa0fac",
        "version_major": 2,
        "version_minor": 0
       },
@@ -180,7 +189,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "763a5fa099bf4b178791a59fde6e1a9d",
+       "model_id": "a3d2c041ae344ecabbbab0904b41d35e",
        "version_major": 2,
        "version_minor": 0
       },
@@ -194,7 +203,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "05091cf113994122bf42b361c9011d5b",
+       "model_id": "d067c2c5b0d2418ca779ba0ef4c1e074",
        "version_major": 2,
        "version_minor": 0
       },
@@ -216,7 +225,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "17:32:09 - cmdstanpy - INFO - CmdStan done processing.\n"
+      "18:01:50 - cmdstanpy - INFO - CmdStan done processing.\n"
      ]
     },
     {
@@ -254,12 +263,15 @@
       "text/plain": [
        "CmdStanMCMC: model=bernoulli chains=4['method=sample', 'algorithm=hmc', 'adapt', 'engaged=1']\n",
        " csv_files:\n",
-       "\t/var/folders/db/4jnggnf549s42z50bd61jskm0000gq/T/tmp86q_w0yg/bernoullibqw94rqo/bernoulli-20220626173209_1.csv\n",
-       "\t/var/folders/db/4jnggnf549s42z50bd61jskm0000gq/T/tmp86q_w0yg/bernoullibqw94rqo/bernoulli-20220626173209_2.csv\n",
-       "\t/var/folders/db/4jnggnf549s42z50bd61jskm0000gq/T/tmp86q_w0yg/bernoullibqw94rqo/bernoulli-20220626173209_3.csv\n",
-       "\t/var/folders/db/4jnggnf549s42z50bd61jskm0000gq/T/tmp86q_w0yg/bernoullibqw94rqo/bernoulli-20220626173209_4.csv\n",
+       "\t/var/folders/db/4jnggnf549s42z50bd61jskm0000gq/T/tmp6qj5nilv/bernoullibxif2srf/bernoulli-20220815180149_1.csv\n",
+       "\t/var/folders/db/4jnggnf549s42z50bd61jskm0000gq/T/tmp6qj5nilv/bernoullibxif2srf/bernoulli-20220815180149_2.csv\n",
+       "\t/var/folders/db/4jnggnf549s42z50bd61jskm0000gq/T/tmp6qj5nilv/bernoullibxif2srf/bernoulli-20220815180149_3.csv\n",
+       "\t/var/folders/db/4jnggnf549s42z50bd61jskm0000gq/T/tmp6qj5nilv/bernoullibxif2srf/bernoulli-20220815180149_4.csv\n",
        " output_files:\n",
-       "\t/var/folders/db/4jnggnf549s42z50bd61jskm0000gq/T/tmp86q_w0yg/bernoullibqw94rqo/bernoulli-20220626173209-stdout.txt"
+       "\t/var/folders/db/4jnggnf549s42z50bd61jskm0000gq/T/tmp6qj5nilv/bernoullibxif2srf/bernoulli-20220815180149_0-stdout.txt\n",
+       "\t/var/folders/db/4jnggnf549s42z50bd61jskm0000gq/T/tmp6qj5nilv/bernoullibxif2srf/bernoulli-20220815180149_1-stdout.txt\n",
+       "\t/var/folders/db/4jnggnf549s42z50bd61jskm0000gq/T/tmp6qj5nilv/bernoullibxif2srf/bernoulli-20220815180149_2-stdout.txt\n",
+       "\t/var/folders/db/4jnggnf549s42z50bd61jskm0000gq/T/tmp6qj5nilv/bernoullibxif2srf/bernoulli-20220815180149_3-stdout.txt"
       ]
      },
      "execution_count": 4,
@@ -271,6 +283,26 @@
     "fit"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "dict_keys(['lp__', 'accept_stat__', 'stepsize__', 'treedepth__', 'n_leapfrog__', 'divergent__', 'energy__'])"
+      ]
+     },
+     "execution_count": 25,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "fit.method_variables().keys()"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
diff --git a/docsrc/users-guide/hello_world.rst b/docsrc/users-guide/hello_world.rst
@@ -105,65 +105,64 @@ By default, the `sample` method runs 4 sampler chains.
     fit = model.sample(data=data_file)
 
 
+*Note* this model can be fit using other methods
+
++ the :meth:`~CmdStanModel.variational` method does approximate Bayesian inference and returns a :class:`CmdStanVB` object
++ the :meth:`~CmdStanModel.optimize` method does maximum likelihood estimation and returns a :class:`CmdStanMLE` object
+
 Accessing the results
 ^^^^^^^^^^^^^^^^^^^^^
 
-The ``sample`` method returns a :class:`CmdStanMCMC` object,
-which provides access to the information from the Stan CSV files.
-The CSV header and data rows contain the outputs from each iteration of the sampler.
-CSV comment blocks are used to report the inference engine configuration and timing information.
-The NUTS-HMC adaptive sampler algorithm also outputs the per-chain HMC tuning parameters step_size and metric.
+The sampler outputs are the set of per-chain
+`Stan CSV files <https://mc-stan.org/docs/cmdstan-guide/stan-csv.html>`_,
+a non-standard CSV file format.
+Each data row of the Stan CSV file contains the per-iteration estimate of the Stan model
+parameters, transformed parameters,  and generated quantities variables.
+Container variables, i.e., vector, row-vector, matrix, and array variables
+are necessarily serialized into a single row's worth of data.
+The output objects parse the set of Stan CSV files into  a set of in-memory data structures
+and provide accessor functions for the all estimates and metadata.
+CmdStanPy makes a distinction between the per-iteration model outputs
+and the per-iteration algorithm outputs:  the former are 'stan_variables'
+and the latter are 'method_variables'.
 
-The ``CmdStanMCMC`` object parses the set of Stan CSV files into separate in-memory data structures for
-the set of sampler iterations, the metadata, and the step_size and metric and provides accessor methods for each.
-The primary object of interest are the draws from all iterations of the sampler, i.e., the CSV data rows.
-The ``CmdStanMCMC`` methods allow the user to extract the sample in whatever data format is needed for their analysis.
-The sample can be extracted in tabular format, either as
+The `CmdStanMCMC` object provides the following accessor methods:
 
-+ a numpy.ndarray: :meth:`~CmdStanMCMC.draws`
++ :meth:`~CmdStanMCMC.stan_variable`: returns an numpy.ndarray whose structure corresponds to the Stan program variable structure
 
-+ a pandas.DataFrame: :meth:`~CmdStanMCMC.draws_pd`
++ :meth:`~CmdStanMCMC.stan_variables`: returns an Python dictionary mapping the Stan program variable names to the corresponding numpy.ndarray.
 
-.. ipython:: python
++ :meth:`~CmdStanMCMC.draws`:  returns a numpy.ndarray which is either a 3-D array draws X chains X CSV columns,
+  or a 2-D array draws X columns, where the chains are concatenated into a single column.
+  The argument `vars` can be used to restrict this to just the columns for one or more variables.
 
-    print(fit.draws().shape)
-    print(fit.draws(concat_chains=True).shape)
-    fit.draws_pd()
++ :meth:`~CmdStanMCMC.draws_pd`: returns a pandas.DataFrame over all columns in the Stan CSV file.
+  The argument `vars` can be used to restrict this to one or more variables.
 
-The sample can be treated as a collection of named, structured variables.
-CmdStanPy makes a distinction between the per-iteration model outputs
-and the per-iteration algorithm outputs:  the former are 'stan_variables'
-and the information reported by the sampler are 'method_variables'.
-Accessor functions extract these as:
++ :meth:`~CmdStanMCMC.draws_xr`: returns an xarray.Dataset which maps model variable names to their respective values.
+  The argument `vars` can be used to restrict this to one or more variables.
 
-+ a structured numpy.ndarray: :meth:`~CmdStanMCMC.stan_variable`
-  which contains the set of all draws in the sample for the named Stan program variable.
-  The draws from all chains are flattened, i.e.,
-  the first ndarray dimension is the number of draws X number of chains.
-  The remaining ndarray dimensions correspond to the Stan program variable dimension.
-
-+ an xarray.Dataset: :meth:`~CmdStanMCMC.draws_xr`
-
-+ a Python dict mapping Stan variable names to numpy.ndarray objects, where the
-  chains are flattened, as above:
-  :meth:`~CmdStanMCMC.stan_variables`.
-
-+ a Python dict mapping the algorithm outputs to numpy.ndarray objects.
-  Because these outputs are used for within-chain and cross-chain diagnostics,
-  they are not flattened.
-  :meth:`~CmdStanMCMC.stan_variables`.
++ :meth:`~CmdStanMCMC.method_variables`: returns a Python dictionary over the sampler diagnostic/information output columns
+  which by convention end in ``__``, e.g., ``lp__``.
 
 
 .. ipython:: python
 
+    # access model variable by name
     print(fit.stan_variable('theta'))
+    print(fit.draws_pd('theta')[:3])
     print(fit.draws_xr('theta'))
+    # access all model variables
     for k, v in fit.stan_variables().items():
         print(f'{k}\t{v.shape}')
+    # access the sampler method variables
     for k, v in fit.method_variables().items():
         print(f'{k}\t{v.shape}')
+    # access all Stan CSV file columns
+    print(f'numpy.ndarray of draws: {fit.draws().shape}')
+    fit.draws_pd()
 
-
+    
 In addition to the MCMC sample itself, the CmdStanMCMC object provides
 access to the the per-chain HMC tuning parameters from the NUTS-HMC adaptive sampler,
 (if present).
@@ -175,7 +174,6 @@ access to the the per-chain HMC tuning parameters from the NUTS-HMC adaptive sam
     print(fit.step_size)
 
 
-
 The CmdStanMCMC object also provides access to metadata about the model and the sampler run.
 
 .. ipython:: python
@@ -189,6 +187,7 @@ The CmdStanMCMC object also provides access to metadata about the model and the
 
 
 
+
 CmdStan utilities:  ``stansummary``, ``diagnose``
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
diff --git a/docsrc/users-guide/workflow.rst b/docsrc/users-guide/workflow.rst
@@ -193,26 +193,30 @@ the object's :attr:`~CmdStanMCMC.metadata` property.
 Output data
 -----------
 
-The CSV data is assembled into the inference result object.
-CmdStanPy provides accessor methods which return this information
+The resulting Stan CSV file or set of files are assembled into an inference result object.
+
++ :class:`CmdStanMCMC` object contains the :meth:`~CmdStanModel.sample` outputs
++ :class:`CmdStanVB` object contains the :meth:`~CmdStanModel.variational` outputs
++ :class:`CmdStanMLE` object contains the :meth:`~CmdStanModel.optimize` outputs
++ :class:`CmdStanGQ` object contains the :meth:`~CmdStanModel.generate_quantities` outputs
+
+
+The objects provide accessor methods which return this information
 either as columnar data (i.e., in terms of the CSV file columns),
 or as method and model variables.
 
-The :meth:`~CmdStanMCMC.draws` and :meth:`~CmdStanMCMC.draws_pd` methods
-for both :class:`CmdStanMCMC` and :class:`CmdStanGQ` return the sample contents
-in columnar format, as a numpy.ndarray or pandas.DataFrame, respectively. Similarly,
-the :meth:`~CmdStanMCMC.draws_xr` method  of these two objects returns the sample
-contents as an :py:class:`xarray.Dataset` which maps the method and model variable
-names to their respective values.
+The ``stan_variables`` method returns a Python dict over all Stan model variables,
+see :meth:`~CmdStanMCMC.stan_variables`.
+
+The ``stan_variable`` method returns a single model variable as a numpy.ndarray object
+with the same structure (per draw) as the Stan program variable,
+see :meth:`~CmdStanMCMC.stan_variable`.
 
-The :meth:`~CmdStanMCMC.method_variables` method returns a Python dict over all inference
-method variables.
+The ``method_variables`` method returns a Python dict over all inference
+method variables, cf :meth:`~CmdStanMCMC.method_variables`
 
-All inference objects expose the following methods:
 
-The :meth:`~CmdStanMCMC.stan_variable` method to returns a numpy.ndarray object
-which contains the set of all draws in the sample for the named Stan program variable.
-The draws from all chains are flattened into a single drawset.
-The first ndarray dimension is the number of draws X number of chains.
-The remaining ndarray dimensions correspond to the Stan program variable dimension.
-The :meth:`~CmdStanMCMC.stan_variables` method returns a Python dict over all Stan model variables.
+The output from the methods :class:`CmdStanMCMC` and :class:`CmdStanGQ` return the sample contents
+in tabular form, see :meth:`~CmdStanMCMC.draws` and :meth:`~CmdStanMCMC.draws_pd`.
+Similarly, the :meth:`~CmdStanMCMC.draws_xr` method returns the sample
+contents as an :py:class:`xarray.Dataset` which is a mapping from variable names to their respective values.