Skip to content

Commit eb3d5fa

Browse files
committed
ujson: do not eat trailing whitespace
Ujson should only worry about whitespace before JSON. This becomes apparent when you are using MP stream protocol to read directly from input buffers. When you attempt to read(1) on a UART (and possibly other protocols) you have to wait for either the byte or the timeout. Fixes: - Waiting for a timeout after you have completed reading a correct and complete JSON off the input. - Raising an OSError after reading a correct and complete JSON off the input. - Eating more data than semantically owned off the input buffer. - Blocking to start parsing JSON until the entire JSON body has been loaded into a potentially large, contiguous Python object. Code you would write before: ``` line = board_busio_uart_port.read_line() json_dict = json.loads(line) ``` or reaching for fixed buffers and swapping them around in Python. Code that did not work before that does now: ``` json_dict = json.load(board_busio_uart_port) ``` - This removes the need for intermediate copies of data when reading JSON from micropython stream protocol inputs. - It also increases total application speed by parsing JSON concurrently with receiving on boards that read from UART via DMA. - It simplifies code that users write while improving their apps.
1 parent 90bd931 commit eb3d5fa

File tree

3 files changed

+25
-8
lines changed

3 files changed

+25
-8
lines changed

extmod/modujson.c

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,10 @@ STATIC mp_obj_t mod_ujson_dumps(mp_obj_t obj) {
5353
}
5454
STATIC MP_DEFINE_CONST_FUN_OBJ_1(mod_ujson_dumps_obj, mod_ujson_dumps);
5555

56+
#define JSON_DEBUG(...) (void)0
57+
// #define JSON_DEBUG(...) mp_printf(&mp_plat_print __VA_OPT__(,) __VA_ARGS__)
58+
59+
5660
// The function below implements a simple non-recursive JSON parser.
5761
//
5862
// The JSON specification is at http://www.ietf.org/rfc/rfc4627.txt
@@ -80,6 +84,7 @@ typedef struct _ujson_stream_t {
8084

8185
STATIC byte ujson_stream_next(ujson_stream_t *s) {
8286
mp_uint_t ret = s->read(s->stream_obj, &s->cur, 1, &s->errcode);
87+
JSON_DEBUG(" usjon_stream_next err:%2d cur: %c \n", s->errcode, s->cur);
8388
if (s->errcode != 0) {
8489
mp_raise_OSError(s->errcode);
8590
}
@@ -92,6 +97,7 @@ STATIC byte ujson_stream_next(ujson_stream_t *s) {
9297
STATIC mp_obj_t mod_ujson_load(mp_obj_t stream_obj) {
9398
const mp_stream_p_t *stream_p = mp_get_stream_raise(stream_obj, MP_STREAM_OP_READ);
9499
ujson_stream_t s = {stream_obj, stream_p->read, 0, 0};
100+
JSON_DEBUG("got JSON stream\n");
95101
vstr_t vstr;
96102
vstr_init(&vstr, 8);
97103
mp_obj_list_t stack; // we use a list as a simple stack for nested JSON
@@ -101,6 +107,15 @@ STATIC mp_obj_t mod_ujson_load(mp_obj_t stream_obj) {
101107
mp_obj_type_t *stack_top_type = NULL;
102108
mp_obj_t stack_key = MP_OBJ_NULL;
103109
S_NEXT(s);
110+
// Eat _leading_ whitespace.
111+
// If we eat trailing whitespace we will block for timeout on streams like UART that
112+
// wait for requested data. Furthermore, it is an OSError to read(1) and incur
113+
// a timeout on those APIs.
114+
// For these reasons, we must only eat _leading_ whitespace.
115+
while (unichar_isspace(S_CUR(s))) {
116+
JSON_DEBUG("Eating leading whitespace");
117+
S_NEXT(s);
118+
}
104119
for (;;) {
105120
cont:
106121
if (S_END(s)) {
@@ -262,14 +277,9 @@ STATIC mp_obj_t mod_ujson_load(mp_obj_t stream_obj) {
262277
}
263278
}
264279
success:
265-
// eat trailing whitespace
266-
while (unichar_isspace(S_CUR(s))) {
267-
S_NEXT(s);
268-
}
269-
if (!S_END(s)) {
270-
// unexpected chars
271-
goto fail;
272-
}
280+
// It is legal for a stream to have contents before and after JSON.
281+
// If this parser has consumed a full successful JSON and its parse
282+
// stack is empty, the parse has succeeded.
273283
if (stack_top == MP_OBJ_NULL || stack.len != 0) {
274284
// not exactly 1 object
275285
goto fail;

ports/atmel-samd/common-hal/busio/UART.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,9 @@
4545

4646
#include "samd/sercom.h"
4747

48+
#define UART_DEBUG(...) (void)0
49+
// #define UART_DEBUG(...) mp_printf(&mp_plat_print __VA_OPT__(,) __VA_ARGS__)
50+
4851
// Do-nothing callback needed so that usart_async code will enable rx interrupts.
4952
// See comment below re usart_async_register_callback()
5053
static void usart_async_rxc_callback(const struct usart_async_descriptor *const descr) {

shared-bindings/busio/UART.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,9 @@
3939
#include "py/stream.h"
4040
#include "supervisor/shared/translate.h"
4141

42+
#define STREAM_DEBUG(...) (void)0
43+
// #define STREAM_DEBUG(...) mp_printf(&mp_plat_print __VA_OPT__(,) __VA_ARGS__)
44+
4245

4346
//| .. currentmodule:: busio
4447
//|
@@ -219,6 +222,7 @@ STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(busio_uart___exit___obj, 4, 4, busio_
219222

220223
// These three methods are used by the shared stream methods.
221224
STATIC mp_uint_t busio_uart_read(mp_obj_t self_in, void *buf_in, mp_uint_t size, int *errcode) {
225+
STREAM_DEBUG("busio_uart_read stream %d\n", size);
222226
busio_uart_obj_t *self = MP_OBJ_TO_PTR(self_in);
223227
check_for_deinit(self);
224228
byte *buf = buf_in;

0 commit comments

Comments
 (0)