Skip to content

Commit 92f5ab3

Browse files
committed
Fix json encoding of NaN/infinity, remove ujson dependency
1 parent c2bab85 commit 92f5ab3

File tree

12 files changed

+98
-98
lines changed

12 files changed

+98
-98
lines changed

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ repos:
2525
- id: mypy
2626
# Copied from setup.cfg
2727
exclude: ^test/
28-
additional_dependencies: [ numpy >= 1.22, types-ujson ]
28+
additional_dependencies: [ numpy >= 1.22]
2929
# local uses the user-installed pylint, this allows dependency checking
3030
- repo: local
3131
hooks:

.pylintrc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
# A comma-separated list of package or module names from where C extensions may
44
# be loaded. Extensions are loading into the active Python interpreter and may
55
# run arbitrary code.
6-
extension-pkg-whitelist=ujson
6+
extension-pkg-whitelist=
77

88
# Add files or directories to the blacklist. They should be base names, not
99
# paths.

cmdstanpy/model.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,22 @@
11
"""CmdStanModel"""
22

33
import io
4+
import json
45
import os
56
import platform
67
import re
78
import shutil
89
import subprocess
910
import sys
11+
import threading
1012
from collections import OrderedDict
1113
from concurrent.futures import ThreadPoolExecutor
1214
from datetime import datetime
1315
from io import StringIO
1416
from multiprocessing import cpu_count
1517
from pathlib import Path
16-
import threading
1718
from typing import Any, Callable, Dict, Iterable, List, Mapping, Optional, Union
1819

19-
import ujson as json
2020
from tqdm.auto import tqdm
2121

2222
from cmdstanpy import _CMDSTAN_REFRESH, _CMDSTAN_SAMPLING, _CMDSTAN_WARMUP
@@ -1587,6 +1587,7 @@ def _run_cmdstan(
15871587
env=os.environ,
15881588
universal_newlines=True,
15891589
)
1590+
timer: Optional[threading.Timer]
15901591
if timeout:
15911592

15921593
def _timer_target() -> None:

cmdstanpy/utils/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ def show_versions(output: bool = True) -> str:
8585
except Exception:
8686
deps_info.append(('cmdstan', 'NOT FOUND'))
8787

88-
deps = ['cmdstanpy', 'pandas', 'xarray', 'tqdm', 'numpy', 'ujson']
88+
deps = ['cmdstanpy', 'pandas', 'xarray', 'tqdm', 'numpy']
8989
for module in deps:
9090
try:
9191
if module in sys.modules:

cmdstanpy/utils/json.py

Lines changed: 4 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -2,30 +2,10 @@
22
Utilities for writing Stan Json files
33
"""
44
import json
5-
import math
65
from collections.abc import Collection
7-
from typing import Any, List, Mapping, Union
6+
from typing import Any, List, Mapping
87

98
import numpy as np
10-
import ujson
11-
12-
from .logging import get_logger
13-
14-
15-
def rewrite_inf_nan(
16-
data: Union[float, int, List[Any]]
17-
) -> Union[str, int, float, List[Any]]:
18-
"""Replaces NaN and Infinity with string representations"""
19-
if isinstance(data, float):
20-
if math.isnan(data):
21-
return 'NaN'
22-
if math.isinf(data):
23-
return ('+' if data > 0 else '-') + 'inf'
24-
return data
25-
elif isinstance(data, list):
26-
return [rewrite_inf_nan(item) for item in data]
27-
else:
28-
return data
299

3010

3111
def serialize_complex(c: Any) -> List[float]:
@@ -56,7 +36,6 @@ def write_stan_json(path: str, data: Mapping[str, Any]) -> None:
5636
"""
5737
data_out = {}
5838
for key, val in data.items():
59-
handle_nan_inf = False
6039
if val is not None:
6140
if isinstance(val, (str, bytes)) or (
6241
type(val).__module__ != 'numpy'
@@ -67,9 +46,9 @@ def write_stan_json(path: str, data: Mapping[str, Any]) -> None:
6746
+ f"write_stan_json for key '{key}'"
6847
)
6948
try:
70-
handle_nan_inf = not np.all(np.isfinite(val))
71-
except TypeError:
7249
# handles cases like val == ['hello']
50+
np.isfinite(val)
51+
except TypeError:
7352
# pylint: disable=raise-missing-from
7453
raise ValueError(
7554
"Invalid type provided to "
@@ -86,12 +65,5 @@ def write_stan_json(path: str, data: Mapping[str, Any]) -> None:
8665
else:
8766
data_out[key] = val
8867

89-
if handle_nan_inf:
90-
data_out[key] = rewrite_inf_nan(data_out[key])
91-
9268
with open(path, 'w') as fd:
93-
try:
94-
ujson.dump(data_out, fd)
95-
except TypeError as e:
96-
get_logger().debug(e)
97-
json.dump(data_out, fd, default=serialize_complex)
69+
json.dump(data_out, fd, default=serialize_complex)

cmdstanpy/utils/stancsv.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""
22
Utility functions for reading the Stan CSV format
33
"""
4+
import json
45
import math
56
import re
67
from enum import Enum, auto
@@ -17,7 +18,6 @@
1718

1819
import numpy as np
1920
import pandas as pd
20-
import ujson
2121

2222
from cmdstanpy import _CMDSTAN_SAMPLING, _CMDSTAN_THIN, _CMDSTAN_WARMUP
2323

@@ -453,7 +453,7 @@ def read_metric(path: str) -> List[int]:
453453
"""
454454
if path.endswith('.json'):
455455
with open(path, 'r') as fd:
456-
metric_dict = ujson.load(fd)
456+
metric_dict = json.load(fd)
457457
if 'inv_metric' in metric_dict:
458458
dims_np: np.ndarray = np.asarray(metric_dict['inv_metric'])
459459
return list(dims_np.shape)

docsrc/users-guide/examples/Run Generated Quantities.ipynb

Lines changed: 53 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
"cells": [
33
{
44
"cell_type": "markdown",
5+
"metadata": {},
56
"source": [
67
"# Generating new quantities of interest.\n",
78
"\n",
@@ -19,11 +20,11 @@
1920
"- transform parameters for reporting\n",
2021
"- apply full Bayesian decision theory\n",
2122
"- calculate log likelihoods, deviances, etc. for model comparison"
22-
],
23-
"metadata": {}
23+
]
2424
},
2525
{
2626
"cell_type": "markdown",
27+
"metadata": {},
2728
"source": [
2829
"## Example: add posterior predictive checks to `bernoulli.stan`\n",
2930
"\n",
@@ -34,12 +35,13 @@
3435
"We instantiate the model `bernoulli`,\n",
3536
"as in the \"Hello World\" section\n",
3637
"of the CmdStanPy [tutorial](https://github.com/stan-dev/cmdstanpy/blob/develop/cmdstanpy_tutorial.ipynb) notebook."
37-
],
38-
"metadata": {}
38+
]
3939
},
4040
{
4141
"cell_type": "code",
4242
"execution_count": null,
43+
"metadata": {},
44+
"outputs": [],
4345
"source": [
4446
"import os\n",
4547
"from cmdstanpy import cmdstan_path, CmdStanModel, CmdStanMCMC, CmdStanGQ\n",
@@ -51,153 +53,151 @@
5153
"# instantiate, compile bernoulli model\n",
5254
"model = CmdStanModel(stan_file=stan_file)\n",
5355
"print(model.code())"
54-
],
55-
"outputs": [],
56-
"metadata": {}
56+
]
5757
},
5858
{
5959
"cell_type": "markdown",
60+
"metadata": {},
6061
"source": [
6162
"The input data consists of `N` - the number of bernoulli trials and `y` - the list of observed outcomes.\n",
6263
"Inspection of the data shows that on average, there is a 20% chance of success for any given Bernoulli trial."
63-
],
64-
"metadata": {}
64+
]
6565
},
6666
{
6767
"cell_type": "code",
6868
"execution_count": null,
69+
"metadata": {},
70+
"outputs": [],
6971
"source": [
7072
"# examine bernoulli data\n",
71-
"import ujson\n",
73+
"import json\n",
7274
"import statistics\n",
7375
"with open(data_file,'r') as fp:\n",
74-
" data_dict = ujson.load(fp)\n",
76+
" data_dict = json.load(fp)\n",
7577
"print(data_dict)\n",
7678
"print('mean of y: {}'.format(statistics.mean(data_dict['y'])))"
77-
],
78-
"outputs": [],
79-
"metadata": {}
79+
]
8080
},
8181
{
8282
"cell_type": "markdown",
83+
"metadata": {},
8384
"source": [
8485
"As in the \"Hello World\" tutorial, we produce a sample from the posterior of the model conditioned on the data:"
85-
],
86-
"metadata": {}
86+
]
8787
},
8888
{
8989
"cell_type": "code",
9090
"execution_count": null,
91+
"metadata": {},
92+
"outputs": [],
9193
"source": [
9294
"# fit the model to the data\n",
9395
"fit = model.sample(data=data_file)"
94-
],
95-
"outputs": [],
96-
"metadata": {}
96+
]
9797
},
9898
{
9999
"cell_type": "markdown",
100+
"metadata": {},
100101
"source": [
101102
"The fitted model produces an estimate of `theta` - the chance of success"
102-
],
103-
"metadata": {}
103+
]
104104
},
105105
{
106106
"cell_type": "code",
107107
"execution_count": null,
108+
"metadata": {},
109+
"outputs": [],
108110
"source": [
109111
"fit.summary()"
110-
],
111-
"outputs": [],
112-
"metadata": {}
112+
]
113113
},
114114
{
115115
"cell_type": "markdown",
116+
"metadata": {},
116117
"source": [
117118
"To run a prior predictive check, we add a `generated quantities` block to the model, in which we generate a new data vector `y_rep` using the current estimate of theta. The resulting model is in file [bernoulli_ppc.stan](https://github.com/stan-dev/cmdstanpy/blob/master/test/data/bernoulli_ppc.stan)"
118-
],
119-
"metadata": {}
119+
]
120120
},
121121
{
122122
"cell_type": "code",
123123
"execution_count": null,
124+
"metadata": {},
125+
"outputs": [],
124126
"source": [
125127
"model_ppc = CmdStanModel(stan_file='bernoulli_ppc.stan')\n",
126128
"print(model_ppc.code())"
127-
],
128-
"outputs": [],
129-
"metadata": {}
129+
]
130130
},
131131
{
132132
"cell_type": "markdown",
133+
"metadata": {},
133134
"source": [
134135
"We run the `generate_quantities` method on `bernoulli_ppc` using existing sample `fit` as input. The `generate_quantities` method takes the values of `theta` in the `fit` sample as the set of draws from the posterior used to generate the corresponsing `y_rep` quantities of interest.\n",
135136
"\n",
136137
"The arguments to the `generate_quantities` method are:\n",
137138
" + `data` - the data used to fit the model\n",
138139
" + `mcmc_sample` - either a `CmdStanMCMC` object or a list of stan-csv files\n"
139-
],
140-
"metadata": {}
140+
]
141141
},
142142
{
143143
"cell_type": "code",
144144
"execution_count": null,
145+
"metadata": {},
146+
"outputs": [],
145147
"source": [
146148
"new_quantities = model_ppc.generate_quantities(data=data_file, mcmc_sample=fit)"
147-
],
148-
"outputs": [],
149-
"metadata": {}
149+
]
150150
},
151151
{
152152
"cell_type": "markdown",
153+
"metadata": {},
153154
"source": [
154155
"The `generate_quantities` method returns a `CmdStanGQ` object which contains the values for all variables in the generated quantitites block of the program ``bernoulli_ppc.stan``. Unlike the output from the ``sample`` method, it doesn't contain any information on the joint log probability density, sampler state, or parameters or transformed parameter values.\n",
155156
"\n",
156157
"In this example, each draw consists of the N-length array of replicate of the `bernoulli` model's input variable `y`, which is an N-length array of Bernoulli outcomes."
157-
],
158-
"metadata": {}
158+
]
159159
},
160160
{
161161
"cell_type": "code",
162162
"execution_count": null,
163+
"metadata": {},
164+
"outputs": [],
163165
"source": [
164166
"print(new_quantities.draws().shape, new_quantities.column_names)\n",
165167
"for i in range(3):\n",
166168
" print (new_quantities.draws()[i,:])"
167-
],
168-
"outputs": [],
169-
"metadata": {}
169+
]
170170
},
171171
{
172172
"cell_type": "markdown",
173+
"metadata": {},
173174
"source": [
174175
"We can also use ``draws_pd(inc_sample=True)`` to get a pandas DataFrame which combines the input drawset with the generated quantities."
175-
],
176-
"metadata": {}
176+
]
177177
},
178178
{
179179
"cell_type": "code",
180180
"execution_count": null,
181+
"metadata": {},
182+
"outputs": [],
181183
"source": [
182184
"sample_plus = new_quantities.draws_pd(inc_sample=True)\n",
183185
"print(type(sample_plus),sample_plus.shape)\n",
184186
"names = list(sample_plus.columns.values[7:18])\n",
185187
"sample_plus.iloc[0:3, :]"
186-
],
187-
"outputs": [],
188-
"metadata": {}
188+
]
189189
},
190190
{
191191
"cell_type": "markdown",
192+
"metadata": {},
192193
"source": [
193194
"For models as simple as the bernoulli models here, it would be trivial to re-run the sampler and generate a new sample which contains both the estimate of the parameters `theta` as well as `y_rep` values. For models which are difficult to fit, i.e., when producing a sample is computationally expensive, the `generate_quantities` method is preferred."
194-
],
195-
"metadata": {}
195+
]
196196
}
197197
],
198198
"metadata": {
199199
"kernelspec": {
200-
"display_name": "Python 3",
200+
"display_name": "Python 3.9.5 ('stan')",
201201
"language": "python",
202202
"name": "python3"
203203
},
@@ -212,8 +212,13 @@
212212
"nbconvert_exporter": "python",
213213
"pygments_lexer": "ipython3",
214214
"version": "3.9.5"
215+
},
216+
"vscode": {
217+
"interpreter": {
218+
"hash": "8765ce46b013071999fc1966b52035a7309a0da7551e066cc0f0fa23e83d4f60"
219+
}
215220
}
216221
},
217222
"nbformat": 4,
218223
"nbformat_minor": 4
219-
}
224+
}

0 commit comments

Comments
 (0)