Skip to content

Commit 4c9f5c5

Browse files
authored
fix geobuf decoding (#12)
* add sample2.json * seems pybind encoder is buggy * use try-emplace * export id of feature * fix should clear values!!!!
1 parent 3a5ce27 commit 4c9f5c5

File tree

6 files changed

+172
-46
lines changed

6 files changed

+172
-46
lines changed

Makefile

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,12 @@ cli_test:
114114
python3 -m pybind11_geobuf normalize_json data/sample1.json build/sample1.normalized.json
115115
python3 -m pybind11_geobuf normalize_json data/sample1.json build/sample1.normalized.precision2.json --precision=2
116116

117+
cli_test2:
118+
python3 -m pybind11_geobuf round_trip data/sample2.json -o build/test/pybind --json2pb_use_python=False --pb2json_use_python=False
119+
python3 -m pybind11_geobuf round_trip data/sample2.json -o build/test/python --json2pb_use_python=True --pb2json_use_python=True
120+
python3 -m pybind11_geobuf round_trip data/sample2.json -o build/test/cxx_py --json2pb_use_python=False --pb2json_use_python=True
121+
python3 -m pybind11_geobuf round_trip data/sample2.json -o build/test/py_cxx --json2pb_use_python=True --pb2json_use_python=False
122+
117123
# conda create -y -n py36 python=3.6
118124
# conda create -y -n py37 python=3.7
119125
# conda create -y -n py38 python=3.8

data/sample2.json

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
{
2+
"type": "Feature",
3+
"properties": {
4+
"string": "string",
5+
"int": 42,
6+
"int2": -101,
7+
"double": 3.141592653,
8+
"list": ["a", "list", "is", "a", "list"],
9+
"dict": {"key": 42, "value": 3.14}
10+
},
11+
"geometry": {
12+
"coordinates": [
13+
[120.40317479950272, 31.416966084052177, 1.111111],
14+
[120.28451900911591, 31.30578266928819, 2.22],
15+
[120.35592249359615, 31.21781895672254, 3.3333333333333],
16+
[120.67093786630113, 31.299502266522722, 4.4]
17+
],
18+
"type": "LineString",
19+
"extra_key": "extra_value"
20+
},
21+
"my_key": "my_value"
22+
}

pybind11_geobuf/__main__.py

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,71 @@ def pbf_decode(path: str, output_path: str = None, *, indent: str = ""):
132132
print(decoded)
133133

134134

135+
def system(cmd: str):
136+
print(f"$ {cmd}")
137+
assert 0 == os.system(cmd), f"failed at {cmd}"
138+
139+
140+
def remove(path):
141+
if not os.path.isfile(path):
142+
return
143+
os.remove(path)
144+
145+
146+
@logger.catch(reraise=True)
147+
def round_trip(
148+
path: str,
149+
output_dir: str = None,
150+
*,
151+
precision: int = 6,
152+
json2pb_use_python: bool = False,
153+
pb2json_use_python: bool = False,
154+
):
155+
"""
156+
_0.json
157+
_1.pbf
158+
_2.pbf.txt
159+
_3.json
160+
"""
161+
assert path.endswith((".json", ".geojson")) and os.path.isfile(path)
162+
path = os.path.abspath(path)
163+
output_dir = os.path.abspath(output_dir or os.path.dirname(path))
164+
os.makedirs(output_dir, exist_ok=True)
165+
filename = os.path.basename(path)
166+
167+
data = rapidjson().load(path).sort_keys()
168+
opath = f"{output_dir}/{filename}_0.json"
169+
remove(opath)
170+
assert data.dump(opath, indent=True)
171+
logger.info(f"wrote to {opath}")
172+
173+
ipath = opath
174+
opath = f"{output_dir}/{filename}_1.pbf"
175+
remove(opath)
176+
if json2pb_use_python:
177+
cmd = f"geobuf encode --precision={precision} --with-z < {ipath} > {opath}" # noqa
178+
system(cmd)
179+
else:
180+
json2geobuf(ipath, opath, precision=precision)
181+
logger.info(f"wrote to {opath}")
182+
183+
ipath = opath
184+
opath = f"{ipath}.txt"
185+
remove(opath)
186+
pbf_decode(ipath, opath)
187+
logger.info(f"wrote to {opath}")
188+
189+
opath = f"{output_dir}/{filename}_3.json"
190+
remove(opath)
191+
if pb2json_use_python:
192+
cmd = f"geobuf decode < {ipath} > {opath}"
193+
system(cmd)
194+
normalize_json(opath, opath)
195+
else:
196+
geobuf2json(ipath, opath, indent=True, sort_keys=True)
197+
logger.info(f"wrote to {opath}")
198+
199+
135200
if __name__ == "__main__":
136201
import fire
137202

@@ -143,5 +208,6 @@ def pbf_decode(path: str, output_path: str = None, *, indent: str = ""):
143208
"normalize_geobuf": normalize_geobuf,
144209
"normalize_json": normalize_json,
145210
"pbf_decode": pbf_decode,
211+
"round_trip": round_trip,
146212
}
147213
)

src/geobuf/geobuf.cpp

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@
1717
#include <protozero/pbf_builder.hpp>
1818
#include <protozero/pbf_reader.hpp>
1919

20+
// https://github.com/mapbox/geobuf/blob/master/encode.js
21+
// https://github.com/mapbox/geobuf/blob/master/decode.js
22+
2023
#ifdef NDEBUG
2124
#define dbg(x) x
2225
#else
@@ -251,6 +254,7 @@ std::string Encoder::encode(const mapbox::geojson::geojson &geojson)
251254
protozero::pbf_writer pbf_g{pbf, 6};
252255
writeGeometry(geometry, pbf_g);
253256
});
257+
// keys.clear(); // we leave it there for debugging purpose
254258
return data;
255259
}
256260

@@ -357,10 +361,7 @@ void Encoder::analyzePoint(const mapbox::geojson::point &point)
357361
}
358362
void Encoder::saveKey(const std::string &key)
359363
{
360-
if (keys.find(key) != keys.end()) {
361-
return;
362-
}
363-
keys[key] = keys.size();
364+
keys.try_emplace(key, keys.size());
364365
}
365366

366367
void Encoder::saveKey(const mapbox::feature::property_map &props)
@@ -389,6 +390,11 @@ void Encoder::writeFeature(const mapbox::geojson::feature &feature, Pbf &pbf)
389390
writeGeometry(feature.geometry, pbf_geom);
390391
}
391392
if (!feature.id.is<mapbox::geojson::null_value_t>()) {
393+
// https://github.com/mapbox/geobuf/blob/daad5e039f842f4d4f24ed7d59f31586563b71b8/geobuf.proto#L18-L21
394+
// oneof id_type {
395+
// string id = 11;
396+
// sint64 int_id = 12;
397+
// }
392398
feature.id.match([&](int64_t id) { pbf.add_int64(12, id); },
393399
[&](const std::string &id) { pbf.add_string(11, id); },
394400
[&](const auto &) {
@@ -460,6 +466,16 @@ void Encoder::writeProps(const mapbox::feature::property_map &props,
460466

461467
void Encoder::writeValue(const mapbox::feature::value &value, Encoder::Pbf &pbf)
462468
{
469+
// message Value {
470+
// oneof value_type {
471+
// string string_value = 1;
472+
// double double_value = 2;
473+
// uint64 pos_int_value = 3;
474+
// uint64 neg_int_value = 4;
475+
// bool bool_value = 5;
476+
// string json_value = 6;
477+
// }
478+
// }
463479
value.match([&](bool val) { pbf.add_bool(5, val); },
464480
[&](uint64_t val) { pbf.add_uint64(3, val); },
465481
[&](int64_t val) { pbf.add_uint64(4, -val); },
@@ -630,6 +646,7 @@ mapbox::geojson::feature_collection Decoder::readFeatureCollection(Pbf &pbf)
630646
fc.custom_properties, //
631647
std::vector<uint32_t>(indexes.begin(), indexes.end()), //
632648
keys, values);
649+
values.clear();
633650
} else {
634651
pbf.skip();
635652
}
@@ -646,7 +663,7 @@ mapbox::geojson::feature Decoder::readFeature(Pbf &pbf)
646663
protozero::pbf_reader pbf_g = pbf.get_message();
647664
f.geometry = readGeometry(pbf_g);
648665
} else if (tag == 11) {
649-
f.id = pbf.get_string();
666+
f.id = pbf.get_string(); // TODO, restore to mapbox::geojson id
650667
} else if (tag == 12) {
651668
f.id = pbf.get_int64();
652669
} else if (tag == 13) {
@@ -661,6 +678,7 @@ mapbox::geojson::feature Decoder::readFeature(Pbf &pbf)
661678
f.properties, //
662679
std::vector<uint32_t>(indexes.begin(), indexes.end()), //
663680
keys, values);
681+
values.clear();
664682
} else if (tag == 15) {
665683
auto indexes = pbf.get_packed_uint32();
666684
if (indexes.size() % 2 != 0) {
@@ -670,6 +688,7 @@ mapbox::geojson::feature Decoder::readFeature(Pbf &pbf)
670688
f.custom_properties, //
671689
std::vector<uint32_t>(indexes.begin(), indexes.end()), //
672690
keys, values);
691+
values.clear();
673692
} else {
674693
pbf.skip();
675694
}
@@ -850,6 +869,7 @@ mapbox::geojson::geometry Decoder::readGeometry(Pbf &pbf)
850869
g.custom_properties, //
851870
std::vector<uint32_t>(indexes.begin(), indexes.end()), //
852871
keys, values);
872+
values.clear();
853873
} else {
854874
pbf.skip();
855875
}

src/geobuf/pybind11_helpers.hpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -338,14 +338,18 @@ inline py::object to_python(const mapbox::geojson::feature &f)
338338
py::dict ret;
339339
ret["type"] = "Feature";
340340
ret["geometry"] = to_python(f.geometry);
341+
if (!f.id.is<mapbox::feature::null_value_t>()) {
342+
ret["id"] = to_python(f.id);
343+
}
341344
ret["properties"] = to_python(f.properties);
342345
if (!f.custom_properties.empty()) {
343346
for (auto &p : f.custom_properties) {
344347
const auto &k = p.first;
345-
if (k == "type" || k == "geometry" || k == "properties") {
348+
if (k == "type" || k == "id" || k == "geometry" ||
349+
k == "properties") {
346350
continue;
347351
}
348-
ret[py::str(p.first)] = to_python(p.second);
352+
ret[py::str(k)] = to_python(p.second);
349353
}
350354
}
351355
return ret;

tests/test_geobuf.py

Lines changed: 47 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -30,10 +30,10 @@ def sample_geojson():
3030
"properties": {
3131
"string": "string",
3232
"int": 42,
33-
# "int2": -101,
33+
"int2": -101,
3434
"double": 3.141592653,
3535
"list": ["a", "list", "is", "a", "list"],
36-
# "dict": {"key": 42, "value": 3.14},
36+
"dict": {"key": 42, "value": 3.14},
3737
},
3838
"geometry": {
3939
"coordinates": [
@@ -97,6 +97,12 @@ def test_rapidjson_empty():
9797
assert rapidjson([4, 2]).clear() != rapidjson({})
9898
assert rapidjson({"key": "value"}).clear() == rapidjson({})
9999

100+
obj1 = rapidjson({"k1": 1, "k2": 2})
101+
obj2 = rapidjson({"k2": 2, "k1": 1})
102+
assert obj1 == obj2
103+
assert list(obj1.keys()) == ["k1", "k2"]
104+
assert list(obj2.keys()) == ["k2", "k1"]
105+
100106

101107
def test_rapidjson_arr():
102108
arr = rapidjson([1, 3, "text", {"key": 3.2}])
@@ -924,8 +930,10 @@ def test_geobuf_from_geojson():
924930
decoded_again = Decoder().decode(
925931
Encoder(max_precision=int(10**8)).encode(decoded)
926932
)
927-
assert decoded_again == decoded
928-
assert decoded_again == Decoder().decode(encoded)
933+
assert rapidjson().loads(decoded_again) == rapidjson().loads(decoded)
934+
assert rapidjson().loads(decoded_again) == rapidjson().loads(
935+
Decoder().decode(encoded)
936+
)
929937

930938
j = Decoder().decode_to_rapidjson(encoded)
931939
g = Decoder().decode_to_geojson(encoded)
@@ -1008,19 +1016,19 @@ def test_geojson_feature():
10081016
props = feature.properties()
10091017
assert not isinstance(props, dict)
10101018
assert isinstance(props, geojson.value.object_type)
1011-
# assert (
1012-
# props.to_rapidjson().sort_keys().dumps()
1013-
# == '{"dict":{"key":42,"value":3.14},"double":3.141592653,"int":42,"int2":-101,"list":["a","list","is","a","list"],"string":"string"}' # noqa
1014-
# )
1015-
1016-
# assert set(props.keys()) == {
1017-
# # "dict",
1018-
# "double",
1019-
# "int",
1020-
# # "int2",
1021-
# "list",
1022-
# "string",
1023-
# }
1019+
assert (
1020+
props.to_rapidjson().sort_keys().dumps()
1021+
== '{"dict":{"key":42,"value":3.14},"double":3.141592653,"int":42,"int2":-101,"list":["a","list","is","a","list"],"string":"string"}' # noqa
1022+
)
1023+
1024+
assert set(props.keys()) == {
1025+
"dict",
1026+
"double",
1027+
"int",
1028+
"int2",
1029+
"list",
1030+
"string",
1031+
}
10241032
keys = list(props.keys())
10251033
values = list(props.values())
10261034
for i, (k, v) in enumerate(props.items()):
@@ -1039,21 +1047,21 @@ def test_geojson_feature():
10391047
assert props["list"]() == ["a", "list", "is", "a", "list"]
10401048
assert props["list"].as_array()() == ["a", "list", "is", "a", "list"]
10411049

1042-
# assert props["dict"].is_object()
1043-
# for k, v in props["dict"].as_object().items():
1044-
# assert isinstance(k, str)
1045-
# assert isinstance(v, geojson.value)
1046-
# assert type(x) == geojson.value
1047-
# with pytest.raises(RuntimeError) as excinfo:
1048-
# props["dict"].as_array()
1049-
# assert "in get<T>()" in repr(excinfo)
1050-
# assert props["dict"]() == {"key": 42, "value": 3.14}
1051-
# assert props["dict"].as_object()() == {"key": 42, "value": 3.14}
1052-
# assert list(props["dict"].keys()) in [
1053-
# # order no guarantee (rapidjson has order, value(unordered_map) not)
1054-
# ["key", "value"],
1055-
# ["value", "key"],
1056-
# ]
1050+
assert props["dict"].is_object()
1051+
for k, v in props["dict"].as_object().items():
1052+
assert isinstance(k, str)
1053+
assert isinstance(v, geojson.value)
1054+
assert type(x) == geojson.value
1055+
with pytest.raises(RuntimeError) as excinfo:
1056+
props["dict"].as_array()
1057+
assert "in get<T>()" in repr(excinfo)
1058+
assert props["dict"]() == {"key": 42, "value": 3.14}
1059+
assert props["dict"].as_object()() == {"key": 42, "value": 3.14}
1060+
assert list(props["dict"].keys()) in [
1061+
# order no guarantee (rapidjson has order, value(unordered_map) not)
1062+
["key", "value"],
1063+
["value", "key"],
1064+
]
10571065

10581066
d = props["double"]
10591067
assert d.GetType() == "double"
@@ -1072,13 +1080,13 @@ def test_geojson_feature():
10721080
i.GetInt64()
10731081
assert "in get<T>()" in repr(excinfo)
10741082

1075-
# i = props["int2"]
1076-
# assert i.GetType() == "int64_t"
1077-
# assert i.GetInt64() == -101
1078-
# assert isinstance(i.GetInt64(), int)
1079-
# with pytest.raises(RuntimeError) as excinfo:
1080-
# i.GetUint64()
1081-
# assert "in get<T>()" in repr(excinfo)
1083+
i = props["int2"]
1084+
assert i.GetType() == "int64_t"
1085+
assert i.GetInt64() == -101
1086+
assert isinstance(i.GetInt64(), int)
1087+
with pytest.raises(RuntimeError) as excinfo:
1088+
i.GetUint64()
1089+
assert "in get<T>()" in repr(excinfo)
10821090

10831091
props["new"] = 6
10841092
assert props["new"]() == 6

0 commit comments

Comments
 (0)