Skip to content

Commit ed0fd3e

Browse files
handle nan inf; normalize non geojson json (#22)
* dump json example succ * locate inf nan * export bad json * round non geojson * release --------- Co-authored-by: TANG ZHIXIONG <zhixiong.tang@momenta.ai>
1 parent 7de0d1b commit ed0fd3e

File tree

8 files changed

+203
-27
lines changed

8 files changed

+203
-27
lines changed

docs/about/release-notes.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,11 @@ To upgrade `pybind11-geobuf` to the latest version, use pip:
1010
pip install -U pybind11-geobuf
1111
```
1212

13+
## Version 0.1.5 (2023-06-02)
14+
15+
* Add `round_non_geojson` to `normalize_json`
16+
* Handle NaN, Inf in json, add `locate_nan_inf`
17+
1318
## Version 0.1.4 (2023-04-15)
1419

1520
* More options to normalize_json

pybind11_geobuf/__main__.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import os
2-
from typing import Tuple
2+
from typing import Optional, Tuple
33

44
from loguru import logger
55

@@ -94,8 +94,9 @@ def normalize_json(
9494
only_xy: bool = False,
9595
denoise_double_0: bool = True,
9696
strip_geometry_z_0: bool = True,
97-
round_geojson_non_geometry: int = 3,
98-
round_geojson_geometry: Tuple[int, int, int] = (8, 8, 3),
97+
round_non_geojson: Optional[int] = 3,
98+
round_geojson_non_geometry: Optional[int] = 3,
99+
round_geojson_geometry: Optional[Tuple[int, int, int]] = (8, 8, 3),
99100
):
100101
logger.info(
101102
f"normalize_json {input_path} ({__filesize(input_path):,} bytes)"
@@ -124,6 +125,7 @@ def normalize_json(
124125
sort_keys=sort_keys,
125126
denoise_double_0=denoise_double_0,
126127
strip_geometry_z_0=strip_geometry_z_0,
128+
round_non_geojson=round_non_geojson,
127129
round_geojson_non_geometry=round_geojson_non_geometry,
128130
round_geojson_geometry=round_geojson_geometry,
129131
), f"failed to normalize json to {output_path}"

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ def build_extension(self, ext):
127127
# logic and declaration, and simpler if you include description/version in a file.
128128
setup(
129129
name="pybind11_geobuf",
130-
version="0.1.4",
130+
version="0.1.5",
131131
author="tzx",
132132
author_email="dvorak4tzx@gmail.com",
133133
url="https://geobuf-cpp.readthedocs.io",

src/geobuf/geobuf.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -75,15 +75,16 @@ bool dump_json(FILE *fp, const RapidjsonValue &json, bool indent,
7575
using namespace rapidjson;
7676
char writeBuffer[65536];
7777
FileWriteStream os(fp, writeBuffer, sizeof(writeBuffer));
78+
bool succ = false;
7879
if (indent) {
7980
PrettyWriter<FileWriteStream> writer(os);
80-
json.Accept(writer);
81+
succ = json.Accept(writer);
8182
} else {
8283
Writer<FileWriteStream> writer(os);
83-
json.Accept(writer);
84+
succ = json.Accept(writer);
8485
}
8586
fclose(fp);
86-
return true;
87+
return succ;
8788
}
8889

8990
bool dump_json(const std::string &path, const RapidjsonValue &json, bool indent,

src/geobuf/rapidjson_helpers.hpp

Lines changed: 68 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,37 @@ inline void sort_keys_inplace(RapidjsonValue &json)
6969
}
7070
}
7171

72+
inline std::optional<std::string> locate_nan_inf(const RapidjsonValue &json,
73+
const std::string &path = "")
74+
{
75+
if (json.IsObject()) {
76+
for (auto &kv : json.GetObject()) {
77+
auto p = locate_nan_inf(kv.value);
78+
if (p) {
79+
return path + "[\"" +
80+
std::string(kv.name.GetString(),
81+
kv.name.GetStringLength()) +
82+
"\"]" + *p;
83+
}
84+
}
85+
} else if (json.IsArray()) {
86+
int index = -1;
87+
for (auto &m : json.GetArray()) {
88+
++index;
89+
auto p = locate_nan_inf(m);
90+
if (p) {
91+
return path + "[" + std::to_string(index) + "]" + *p;
92+
}
93+
}
94+
} else if (json.IsDouble()) {
95+
double d = json.GetDouble();
96+
if (std::isnan(d) || std::isinf(d)) {
97+
return path;
98+
}
99+
}
100+
return {};
101+
}
102+
72103
inline void round_rapidjson(RapidjsonValue &json, double scale, int depth = 1,
73104
const std::vector<std::string> &skip_keys = {})
74105
{
@@ -97,6 +128,30 @@ inline void round_rapidjson(RapidjsonValue &json, double scale, int depth = 1,
97128
}
98129
}
99130

131+
inline void round_non_geojson(RapidjsonValue &json, double scale)
132+
{
133+
if (json.IsObject()) {
134+
auto itr = json.FindMember("type");
135+
if (itr != json.MemberEnd() && itr->value.IsString()) {
136+
const auto type = std::string(itr->value.GetString(),
137+
itr->value.GetStringLength());
138+
if ( //
139+
type == "FeatureCollection" //
140+
|| type == "Feature" //
141+
|| type == "Point" //
142+
|| type == "MultiPoint" //
143+
|| type == "LineString" //
144+
|| type == "MultiLineString" //
145+
|| type == "Polygon" //
146+
|| type == "MultiPolygon" //
147+
|| type == "GeometryCollection") {
148+
return;
149+
}
150+
}
151+
}
152+
round_rapidjson(json, scale, INT_MAX);
153+
}
154+
100155
inline void round_geojson_non_geometry(RapidjsonValue &json, double scale)
101156
{
102157
if (!json.IsObject()) {
@@ -284,8 +339,9 @@ normalize_json(RapidjsonValue &json, //
284339
bool sort_keys = true, //
285340
std::optional<int> round_geojson_non_geometry = 3, //
286341
const std::optional<std::array<int, 3>> &round_geojson_geometry =
287-
std::array<int, 3>{8, 8, 3}, //
288-
bool denoise_double_0 = true, //
342+
std::array<int, 3>{8, 8, 3}, //
343+
std::optional<int> round_non_geojson = 3, //
344+
bool denoise_double_0 = true, //
289345
bool strip_geometry_z_0 = true)
290346
{
291347
if (sort_keys) {
@@ -301,6 +357,10 @@ normalize_json(RapidjsonValue &json, //
301357
std::pow(10.0, precision[1]),
302358
std::pow(10.0, precision[2])});
303359
}
360+
if (round_non_geojson) {
361+
double scale = std::pow(10.0, *round_non_geojson);
362+
cubao::round_non_geojson(json, scale);
363+
}
304364
if (strip_geometry_z_0) {
305365
cubao::strip_geometry_z_0(json);
306366
}
@@ -341,24 +401,25 @@ inline bool dump_json(const std::string &path, const RapidjsonValue &json,
341401
}
342402
using namespace rapidjson;
343403
char writeBuffer[65536];
404+
bool succ = false;
344405
FileWriteStream os(fp, writeBuffer, sizeof(writeBuffer));
345406
if (indent) {
346407
PrettyWriter<FileWriteStream> writer(os);
347408
if (sort_keys) {
348-
cubao::sort_keys(json).Accept(writer);
409+
succ = cubao::sort_keys(json).Accept(writer);
349410
} else {
350-
json.Accept(writer);
411+
succ = json.Accept(writer);
351412
}
352413
} else {
353414
Writer<FileWriteStream> writer(os);
354415
if (sort_keys) {
355-
cubao::sort_keys(json).Accept(writer);
416+
succ = cubao::sort_keys(json).Accept(writer);
356417
} else {
357-
json.Accept(writer);
418+
succ = json.Accept(writer);
358419
}
359420
}
360421
fclose(fp);
361-
return true;
422+
return succ;
362423
}
363424

364425
inline RapidjsonValue loads(const std::string &json)
@@ -508,11 +569,6 @@ inline RapidjsonValue to_rapidjson(const mapbox::geojson::value &json)
508569
return to_rapidjson(json, allocator);
509570
}
510571

511-
// inline mapbox::geojson::value to_geojson_value(const RapidjsonValue &json)
512-
// {
513-
// return mapbox::geojson::convert<mapbox::geojson::value>(json);
514-
// }
515-
516572
inline bool is_subset_of(const RapidjsonValue &a, const RapidjsonValue &b)
517573
{
518574
if (a.IsArray()) {

src/main.cpp

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -36,36 +36,40 @@ PYBIND11_MODULE(_pybind11_geobuf, m)
3636
"normalize_json",
3737
[](const std::string &input, const std::string &output, bool indent,
3838
bool sort_keys, bool denoise_double_0, bool strip_geometry_z_0,
39+
std::optional<int> round_non_geojson,
3940
std::optional<int> round_geojson_non_geometry,
4041
const std::optional<std::array<int, 3>> &round_geojson_geometry) {
4142
auto json = mapbox::geobuf::load_json(input);
4243
cubao::normalize_json(json, //
4344
sort_keys, //
4445
round_geojson_non_geometry, //
4546
round_geojson_geometry, //
47+
round_non_geojson, //
4648
denoise_double_0, //
4749
strip_geometry_z_0);
4850
return mapbox::geobuf::dump_json(output, json, indent);
4951
},
50-
"input_path"_a, "output_path"_a, //
51-
py::kw_only(), //
52-
"indent"_a = true, //
53-
"sort_keys"_a = true, //
54-
"denoise_double_0"_a = true, //
55-
"strip_geometry_z_0"_a = true, //
56-
"round_geojson_non_geometry"_a = 3,
52+
"input_path"_a, "output_path"_a, //
53+
py::kw_only(), //
54+
"indent"_a = true, //
55+
"sort_keys"_a = true, //
56+
"denoise_double_0"_a = true, //
57+
"strip_geometry_z_0"_a = true, //
58+
"round_non_geojson"_a = 3, //
59+
"round_geojson_non_geometry"_a = 3, //
5760
"round_geojson_geometry"_a = std::array<int, 3>{8, 8, 3})
5861
.def(
5962
"normalize_json",
6063
[](RapidjsonValue &json, bool sort_keys, bool denoise_double_0,
61-
bool strip_geometry_z_0,
64+
bool strip_geometry_z_0, std::optional<int> round_non_geojson,
6265
std::optional<int> round_geojson_non_geometry,
6366
const std::optional<std::array<int, 3>> &round_geojson_geometry)
6467
-> RapidjsonValue & {
6568
cubao::normalize_json(json, //
6669
sort_keys, //
6770
round_geojson_non_geometry, //
6871
round_geojson_geometry, //
72+
round_non_geojson, //
6973
denoise_double_0, //
7074
strip_geometry_z_0);
7175
return json;
@@ -75,6 +79,7 @@ PYBIND11_MODULE(_pybind11_geobuf, m)
7579
"sort_keys"_a = true, //
7680
"denoise_double_0"_a = true, //
7781
"strip_geometry_z_0"_a = true, //
82+
"round_non_geojson"_a = 3, //
7883
"round_geojson_non_geometry"_a = 3,
7984
"round_geojson_geometry"_a = std::array<int, 3>{8, 8, 3},
8085
rvp::reference_internal);

src/pybind11_rapidjson.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,10 @@ void bind_rapidjson(py::module &m)
149149
sort_keys_inplace(self);
150150
return self;
151151
}, rvp::reference_internal)
152+
// locate_nan_inf
153+
.def("locate_nan_inf", [](const RapidjsonValue &self) -> std::optional<std::string> {
154+
return locate_nan_inf(self);
155+
})
152156
.def("round", [](RapidjsonValue &self, double precision, int depth, //
153157
const std::vector<std::string> &skip_keys) -> RapidjsonValue & {
154158
round_rapidjson(self, std::pow(10, precision), depth, skip_keys);

tests/test_geobuf.py

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,11 @@
11
import base64
2+
import contextlib
23
import json
34
import os
45
import pickle
6+
import shutil
57
import sys
8+
import tempfile
69
from copy import deepcopy
710

811
import numpy as np
@@ -1652,6 +1655,106 @@ def pytest_main(dir: str, *, test_file: str = None):
16521655
)
16531656

16541657

1658+
@contextlib.contextmanager
1659+
def context_tempdir():
1660+
temp_dir = tempfile.mkdtemp()
1661+
try:
1662+
yield temp_dir
1663+
finally:
1664+
if os.path.isdir(temp_dir):
1665+
shutil.rmtree(temp_dir)
1666+
1667+
1668+
def test_rapidjson_dump_nan():
1669+
j = rapidjson(
1670+
{
1671+
"key": "value",
1672+
"pi": 3.14,
1673+
"nan": np.nan,
1674+
}
1675+
)
1676+
assert j.dumps() == '{"key":"value","pi":3.14,"nan":'
1677+
assert j.locate_nan_inf() == '["nan"]'
1678+
del j["nan"]
1679+
assert j.locate_nan_inf() is None
1680+
j["inf"] = np.inf
1681+
assert j.locate_nan_inf() == '["inf"]'
1682+
1683+
assert rapidjson(np.inf).locate_nan_inf() == ""
1684+
assert rapidjson(np.nan).locate_nan_inf() == ""
1685+
assert rapidjson("json").locate_nan_inf() is None
1686+
1687+
jj = rapidjson({"root": {}})
1688+
jj["root"]["child"] = j
1689+
assert jj.locate_nan_inf() == '["root"]["child"]["inf"]'
1690+
1691+
with context_tempdir() as dir:
1692+
j = rapidjson({"root": {}})
1693+
path = f"{dir}/okay.json"
1694+
assert j.dump(path)
1695+
with open(path) as f:
1696+
text = f.read()
1697+
assert text == '{"root":{}}'
1698+
1699+
with context_tempdir() as dir:
1700+
path = f"{dir}/fail.json"
1701+
assert not jj.dump(path)
1702+
1703+
1704+
def test_rapidjson_normalize_non_geojson():
1705+
j = normalize_json(rapidjson({"value": 3.14156}))
1706+
assert j.dumps() == '{"value":3.142}'
1707+
1708+
j = normalize_json(rapidjson({"value": 3.14156}), round_non_geojson=None)
1709+
assert j.dumps() == '{"value":3.14156}'
1710+
1711+
j = normalize_json(
1712+
rapidjson(
1713+
{
1714+
"type": "Point",
1715+
"coordinates": [1.23456, 7.890123, 4.567],
1716+
"value": 3.123456,
1717+
}
1718+
).sort_keys(),
1719+
round_geojson_geometry=[3, 2, 1],
1720+
)
1721+
assert (
1722+
j.dumps()
1723+
== '{"coordinates":[1.235,7.89,4.6],"type":"Point","value":3.123}' # noqa
1724+
)
1725+
1726+
j = normalize_json(
1727+
rapidjson(
1728+
{
1729+
"type": "Point",
1730+
"coordinates": [1.23456, 7.890123, 4.567],
1731+
"value": 3.123456,
1732+
}
1733+
).sort_keys(),
1734+
round_geojson_geometry=None,
1735+
)
1736+
assert (
1737+
j.dumps()
1738+
== '{"coordinates":[1.23456,7.890123,4.567],"type":"Point","value":3.123}' # noqa
1739+
)
1740+
1741+
j = normalize_json(
1742+
rapidjson(
1743+
{
1744+
"type": "Point",
1745+
"coordinates": [1.23456, 7.890123, 4.567],
1746+
"value": 3.123456,
1747+
}
1748+
).sort_keys(),
1749+
round_geojson_geometry=[1, 1, 1],
1750+
round_geojson_non_geometry=None,
1751+
)
1752+
assert (
1753+
j.dumps()
1754+
== '{"coordinates":[1.2,7.9,4.6],"type":"Point","value":3.123456}' # noqa
1755+
)
1756+
1757+
16551758
if __name__ == "__main__":
16561759
np.set_printoptions(suppress=True)
16571760
pwd = os.path.abspath(os.path.dirname(__file__))

0 commit comments

Comments
 (0)