Skip to content

Commit 970bee7

Browse files
authored
Centralize orjson usage across library (#435)
* replace orjson.loads with serializer method * remove direct orjson reference from web * centralize orjson library use; extend serializer `to_json` function to return in byte form if specified
1 parent 0cf3a49 commit 970bee7

File tree

10 files changed

+34
-29
lines changed

10 files changed

+34
-29
lines changed

biothings/hub/api/handlers/base.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,10 @@
66
# see https://github.com/biothings/biothings.api/commit/59c0d78f758018b0d87836657a2b5d1a700503a1
77
# import pandas.io.json as pdjson
88
# replace pandas json encoder with orjson:
9-
import orjson
109
from tornado.web import RequestHandler
1110

1211
from biothings import config
12+
from biothings.utils import serializer
1313

1414

1515
class DefaultHandler(RequestHandler):
@@ -26,10 +26,9 @@ def write(self, result):
2626
# "result": result,
2727
# "status": "ok"
2828
# }, iso_dates=True)
29-
orjson.dumps(
29+
serializer.to_json({
3030
{"result": result, "status": "ok"},
31-
option=orjson.OPT_NON_STR_KEYS | orjson.OPT_NAIVE_UTC,
32-
).decode()
31+
})
3332
)
3433

3534
def write_error(self, status_code, **kwargs):

biothings/hub/dataload/dumper.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020
from typing import Any, Callable, Dict, Generator, Iterable, List, Optional, Tuple, Union
2121
from urllib import parse as urlparse
2222

23+
from biothings.utils import serializer
24+
2325
try:
2426
import docker
2527
from docker.errors import ImageNotFound, NotFound, NullResource
@@ -28,7 +30,6 @@
2830
except ImportError:
2931
docker_avail = False
3032

31-
import orjson
3233
import requests
3334

3435
from biothings import config as btconfig
@@ -1883,7 +1884,7 @@ def _run_api_and_store_to_disk(
18831884
try:
18841885
for filename, obj in fn():
18851886
fn_byte_arr = buffer.setdefault(filename, bytearray())
1886-
fn_byte_arr.extend(orjson.dumps(obj) + b"\n")
1887+
fn_byte_arr.extend(serializer.to_json(obj, return_bytes=True) + b"\n")
18871888
if len(fn_byte_arr) >= buffer_size:
18881889
with open(f"{filename}.{pid}", "ab") as f:
18891890
f.write(fn_byte_arr)

biothings/utils/dotfield.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import orjson
1+
from biothings.utils import serializer
22

33

44
def make_object(attr, value):
@@ -21,10 +21,9 @@ def make_object(attr, value):
2121
# s += "}" * (len(attr_list))
2222
# return json.loads(s)
2323

24-
# New implementation using orjson module
25-
s += orjson.dumps(value).decode("utf-8") # decoding is necessary because orjson dumps into bytes
24+
s += serializer.to_json(value)
2625
s += "}" * (len(attr_list))
27-
return orjson.loads(s)
26+
return serializer.load_json(s)
2827

2928

3029
def merge_object(obj1, obj2):

biothings/utils/parsers.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from typing import Callable, Generator, Iterable, Optional
44
from urllib.parse import parse_qsl, urlparse
55

6-
import orjson
6+
from biothings.utils import serializer
77

88

99
def ndjson_parser(
@@ -31,7 +31,7 @@ def ndjson_parser_func(data_folder):
3131
for filename in work_dir.glob(pattern):
3232
with open(filename, "rb") as f:
3333
for line in f:
34-
doc = orjson.loads(line)
34+
doc = serializer.load_json(line)
3535
yield doc
3636

3737
return ndjson_parser_func
@@ -60,7 +60,7 @@ def json_array_parser(data_folder):
6060
for pattern in patterns:
6161
for filename in work_dir.glob(pattern):
6262
with open(filename, "r") as f:
63-
data = orjson.loads(f.read())
63+
data = serializer.load_json(f.read())
6464
try:
6565
iterator = iter(data)
6666
except TypeError:

biothings/utils/serializer.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ def orjson_default(o):
2828
raise TypeError(f"Type {type(o)} not serializable")
2929

3030

31-
def to_json(data, indent=False, sort_keys=False):
31+
def to_json(data, indent=False, sort_keys=False, return_bytes=False):
3232
# default option:
3333
# OPT_NON_STR_KEYS: non string dictionary key, e.g. integer
3434
# OPT_NAIVE_UTC: use UTC as the timezone when it's missing
@@ -37,7 +37,12 @@ def to_json(data, indent=False, sort_keys=False):
3737
option |= orjson.OPT_INDENT_2
3838
if sort_keys:
3939
option |= orjson.OPT_SORT_KEYS
40-
return orjson.dumps(data, default=orjson_default, option=option).decode()
40+
41+
byte_dump = orjson.dumps(data, default=orjson_default, option=option)
42+
if return_bytes:
43+
return byte_dump
44+
45+
return byte_dump.decode()
4146

4247

4348
def to_json_file(data, fobj, indent=False, sort_keys=False):

biothings/web/analytics/channels.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@
22
import asyncio
33
import certifi
44
import logging
5-
import orjson
65
import ssl
76

7+
from biothings.utils import serializer
88
from biothings.web.analytics.events import Event, Message
99

1010

@@ -81,7 +81,7 @@ async def send(self, event):
8181
"user_id": str(event._cid(1)),
8282
"events": events[i : i + 25],
8383
}
84-
await self.send_request(session, self.url, orjson.dumps(data))
84+
await self.send_request(session, self.url, serializer.to_json(data, return_bytes=True))
8585

8686
async def send_request(self, session, url, data):
8787
retries = 0

biothings/web/handlers/base.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
"""
2121
import logging
2222

23-
import orjson
2423
import yaml
2524
from tornado.web import HTTPError, RequestHandler
2625

@@ -105,8 +104,8 @@ def _parse_json(self):
105104
if not self.request.body:
106105
return {}
107106
try:
108-
return orjson.loads(self.request.body)
109-
except orjson.JSONDecodeError:
107+
return serializer.load_json(self.request.body)
108+
except serializer.JSONDecodeError:
110109
raise HTTPError(400, reason="Invalid JSON body.")
111110

112111
def _parse_yaml(self):

biothings/web/options/manager.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from types import MappingProxyType
1111

1212
import jmespath
13-
import orjson
13+
from biothings.utils import serializer
1414

1515
try:
1616
from re import Pattern # py>=3.7
@@ -250,8 +250,8 @@ def __init__(self, **kwargs):
250250
def convert_to(self, value, to_type):
251251
if self.jsoninput:
252252
try: # attempt to load as json first
253-
_value = orjson.loads(value)
254-
except orjson.JSONDecodeError as exc:
253+
_value = serializer.load_json(value)
254+
except serializer.JSONDecodeError as exc:
255255
logging.debug(repr(exc))
256256
else: # no more conversions
257257
if isinstance(_value, to_type):

biothings/web/query/builder.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,8 @@ class implementations or not defined.
4040

4141
from elasticsearch.dsl import MultiSearch, Q, Search
4242
from elasticsearch.dsl.exceptions import IllegalOperation
43-
import orjson
4443

44+
from biothings.utils import serializer
4545
from biothings.utils.common import dotdict
4646
from biothings.web.query.formatter import ESResultFormatter
4747
from biothings.web.services.metadata import BiothingsMetadata
@@ -430,9 +430,9 @@ def __init__(self, path):
430430
## alternative implementation # noqa: E266
431431
# self._queries[os.path.basename(dirpath)] = text_file.read()
432432
##
433-
self._queries[os.path.basename(dirpath)] = orjson.loads(text_file.read())
433+
self._queries[os.path.basename(dirpath)] = serializer.load_json(text_file.read())
434434
elif "filter" in filename:
435-
self._filters[os.path.basename(dirpath)] = orjson.loads(text_file.read())
435+
self._filters[os.path.basename(dirpath)] = serializer.load_json(text_file.read())
436436
except Exception:
437437
self.logger.exception("Error loading user queries.")
438438

tests/web/analytics/test_channels.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
import aiohttp
22
import asyncio
3-
import orjson
43
import pytest
54

65
from aioresponses import aioresponses
6+
7+
from biothings.utils import serializer
78
from biothings.web.analytics.channels import SlackChannel, GA4Channel, GAChannel
89
from biothings.web.analytics.events import GAEvent, Message
910
from unittest.mock import patch
@@ -91,7 +92,8 @@ async def test_send_GA4():
9192
async def test_send_GA4_request_retries():
9293
channel = GA4Channel("G-XXXXXX", "SECRET")
9394
url = channel.url
94-
data = orjson.dumps({"test": "data"})
95+
# data = orjson.dumps({"test": "data"})
96+
data = serializer.to_json({"test": "data"}, return_bytes=True)
9597

9698
async with aiohttp.ClientSession() as session:
9799
with aioresponses() as responses:
@@ -109,7 +111,7 @@ async def test_send_GA4_request_retries():
109111
async def test_send_GA4_request_max_retries():
110112
channel = GA4Channel("G-XXXXXX", "SECRET")
111113
url = channel.url
112-
data = orjson.dumps({"test": "data"})
114+
data = serializer.to_json({"test": "data"}, return_bytes=True)
113115

114116
async with aiohttp.ClientSession() as session:
115117
with aioresponses() as responses:

0 commit comments

Comments
 (0)