Merge branch 'develop' of https://github.com/stan-dev/cmdstanpy into develop

mitzimorris · mitzimorris · commit c4da51a55e3d · 2022-08-10T17:55:02.000-04:00
diff --git a/cmdstanpy/stanfit/gq.py b/cmdstanpy/stanfit/gq.py
@@ -268,6 +268,7 @@ def draws_pd(
                 vars_list = [vars]
             else:
                 vars_list = vars
+            vars_list = list(dict.fromkeys(vars_list))
         if (
             inc_warmup
             and not self.mcmc_sample.metadata.cmdstan_config['save_warmup']
@@ -282,7 +283,7 @@ def draws_pd(
         gq_cols = []
         mcmc_vars = []
         if vars is not None:
-            for var in set(vars_list):
+            for var in vars_list:
                 if var in self.metadata.stan_vars_cols:
                     for idx in self.metadata.stan_vars_cols[var]:
                         gq_cols.append(self.column_names[idx])
@@ -295,6 +296,7 @@ def draws_pd(
                     raise ValueError('Unknown variable: {}'.format(var))
         else:
             gq_cols = list(self.column_names)
+            vars_list = gq_cols
 
         if inc_sample and mcmc_vars:
             if gq_cols:
@@ -311,7 +313,7 @@ def draws_pd(
                         )[gq_cols],
                     ],
                     axis='columns',
-                )
+                )[vars_list]
             else:
                 return self.mcmc_sample.draws_pd(
                     vars=mcmc_vars, inc_warmup=inc_warmup
diff --git a/cmdstanpy/stanfit/mcmc.py b/cmdstanpy/stanfit/mcmc.py
@@ -589,7 +589,7 @@ def draws_pd(
             self._assemble_draws()
         cols = []
         if vars is not None:
-            for var in set(vars_list):
+            for var in dict.fromkeys(vars_list):
                 if (
                     var not in self.metadata.method_vars_cols
                     and var not in self.metadata.stan_vars_cols
diff --git a/docsrc/community.rst b/docsrc/community.rst
@@ -13,6 +13,11 @@ when you start a new project.
   <https://github.com/teddygroves/cookiecutter-cmdstanpy-analysis>`_ A
   cookiecutter template for cmdstanpy-based statistical analysis projects.
 
+- `cookiecutter-cmdstanpy-wrapper
+  <https://github.com/WardBrian/cookiecutter-cmdstanpy-wrapper>`_ A
+  cookiecutter template using Stan models in Python packages, including
+  the ability to pre-compile the model as part of the package distribution.
+
 Software
 --------
 
diff --git a/docsrc/users-guide.rst b/docsrc/users-guide.rst
@@ -10,4 +10,5 @@ usage of CmdStanPy.
     users-guide/overview
     users-guide/hello_world
     users-guide/workflow
+    users-guide/outputs
     users-guide/examples
diff --git a/docsrc/users-guide/hello_world.rst b/docsrc/users-guide/hello_world.rst
@@ -104,21 +104,6 @@ By default, the `sample` method runs 4 sampler chains.
     # fit the model
     fit = model.sample(data=data_file)
 
-Underlyingly, the CmdStan outputs are a set of per-chain
-`Stan CSV files <https://mc-stan.org/docs/cmdstan-guide/stan-csv.html#mcmc-sampler-csv-output>`__.
-The filenames follow the template '<model_name>-<YYYYMMDDHHMMSS>-<chain_id>'
-plus the file suffix '.csv'.
-CmdStanPy also captures the per-chain console and error messages.
-The ``output_dir`` argument is an optional argument which specifies
-the path to the output directory used by CmdStan.
-If this argument is omitted, the output files are written
-to a temporary directory which is deleted when the current Python session is terminated.
-
-.. ipython:: python
-
-    # printing the object reports sampler commands, output files
-    print(fit)
-
 
 Accessing the results
 ^^^^^^^^^^^^^^^^^^^^^
@@ -230,18 +215,3 @@ The :meth:`~CmdStanMCMC.diagnose` method runs this utility and prints the output
 .. ipython:: python
 
     print(fit.diagnose())
-
-
-
-Managing Stan CSV files
-^^^^^^^^^^^^^^^^^^^^^^^
-
-The :class:`CmdStanMCMC` object keeps track of all output files produced
-by the sampler run.
-The :meth:`~CmdStanMCMC.save_csvfiles` function moves the CSV files
-to a specified directory.
-
-.. ipython:: python
-    :verbatim:
-
-    fit.save_csvfiles(dir='some/path')
diff --git a/docsrc/users-guide/outputs.rst b/docsrc/users-guide/outputs.rst
@@ -0,0 +1,109 @@
+.. py:currentmodule:: cmdstanpy
+
+Controlling Outputs
+===================
+
+CSV File Outputs
+----------------
+
+Underlyingly, the CmdStan outputs are a set of per-chain
+`Stan CSV files <https://mc-stan.org/docs/cmdstan-guide/stan-csv.html#mcmc-sampler-csv-output>`__.
+The filenames follow the template '<model_name>-<YYYYMMDDHHMMSS>-<chain_id>'
+plus the file suffix '.csv'.
+CmdStanPy also captures the per-chain console and error messages.
+
+.. ipython:: python
+
+    import os
+    from cmdstanpy import CmdStanModel
+    stan_file = os.path.join('users-guide', 'examples', 'bernoulli.stan')
+    model = CmdStanModel(stan_file=stan_file)
+
+    data_file = os.path.join('users-guide', 'examples', 'bernoulli.data.json')
+    fit = model.sample(data=data_file)
+
+    # printing the object reports sampler commands, output files
+    print(fit)
+
+The ``output_dir`` argument is an optional argument which specifies
+the path to the output directory used by CmdStan.
+If this argument is omitted, the output files are written
+to a temporary directory which is deleted when the current Python session is terminated.
+
+.. ipython:: python
+
+    fit = model.sample(data=data_file, output_dir="./outputs/")
+
+    !ls outputs/
+
+Alternatively, the :meth:`~CmdStanMCMC.save_csvfiles` function moves the CSV files
+to a specified directory.
+
+.. ipython:: python
+
+    fit = model.sample(data=data_file)
+    fit.save_csvfiles(dir='some/path')
+
+    !ls some/path
+
+.. ipython:: python
+    :suppress:
+
+    !rm -rf outputs/ some/path/
+
+Logging
+-------
+
+You may notice CmdStanPy can produce a lot of output when it is running:
+
+.. ipython:: python
+
+    fit = model.sample(data=data_file, show_progress=False)
+
+This output is managed through the built-in :mod:`logging` module. For example, it can be disabled entirely:
+
+.. ipython:: python
+
+    import logging
+    cmdstanpy_logger = logging.getLogger("cmdstanpy")
+    cmdstanpy_logger.disabled = True
+    # look, no output!
+    fit = model.sample(data=data_file, show_progress=False)
+
+Or one can remove the logging handler that CmdStanPy installs by default and install their own for more
+fine-grained control. For example, the following code sends all logs (including the ``DEBUG`` logs, which are hidden by default),
+to a file.
+
+DEBUG logging is useful primarily to developers or when trying to hunt down an issue.
+
+.. ipython:: python
+
+    cmdstanpy_logger.disabled = False
+    # remove all existing handlers
+    cmdstanpy_logger.handlers = []
+
+    cmdstanpy_logger.setLevel(logging.DEBUG)
+    handler = logging.FileHandler('all.log')
+    handler.setLevel(logging.DEBUG)
+    handler.setFormatter(
+        logging.Formatter(
+            '%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+            "%H:%M:%S",
+        )
+    )
+    cmdstanpy_logger.addHandler(handler)
+
+Now, if we run the model and check the contents of the file, we will see all the possible logging.
+
+.. ipython:: python
+
+    fit = model.sample(data=data_file, show_progress=False)
+
+    with open('all.log','r') as logs:
+        for line in logs.readlines():
+            print(line.strip())
+
+.. ipython:: python
+    :suppress:
+
+    !rm all.log
diff --git a/test/test_generate_quantities.py b/test/test_generate_quantities.py
@@ -84,6 +84,11 @@ def test_from_csv_files(self):
             + bern_gqs.draws_pd().shape[1],
         )
 
+        self.assertEqual(
+            list(bern_gqs.draws_pd(vars=['y_rep']).columns),
+            column_names,
+        )
+
     def test_from_csv_files_bad(self):
         # gq model
         stan = os.path.join(DATAFILES_PATH, 'bernoulli_ppc.stan')
diff --git a/test/test_sample.py b/test/test_sample.py
@@ -741,6 +741,15 @@ def test_validate_good_run(self):
         self.assertEqual(fit.draws_pd(vars=['theta', 'lp__']).shape, (400, 2))
         self.assertEqual(fit.draws_pd(vars='theta').shape, (400, 1))
 
+        self.assertEqual(
+            list(fit.draws_pd(vars=['theta', 'lp__']).columns),
+            ['theta', 'lp__']
+        )
+        self.assertEqual(
+            list(fit.draws_pd(vars=['lp__', 'theta']).columns),
+            ['lp__', 'theta']
+        )
+
         summary = fit.summary()
         self.assertIn('5%', list(summary.columns))
         self.assertIn('50%', list(summary.columns))