Skip to content

Commit ab1afaa

Browse files
authored
Add JSONBuilder.string_with_len() method (#1244)
1 parent 54bf364 commit ab1afaa

File tree

3 files changed

+56
-23
lines changed

3 files changed

+56
-23
lines changed

doc/json-building.md

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,9 @@ To actually build the JSON, use the following methods, where `jb` is a `JSONBuil
8282
- `jb.string(s: byte*)` adds a string to the JSON.
8383
If `s` is `NULL`, it instead adds `null` just like `jb.null()` would.
8484
See also [the notes about strings below](#notes-about-strings).
85+
- `jb.string_with_len(s: byte*, len: intnative)` is like `jb.string(s)` except that the string may contain zero bytes.
86+
Instead of looking for a zero byte to figure out where the string ends,
87+
the length in bytes is given as an argument.
8588
- `jb.begin_array()` and `jb.end_array()` are used to build a JSON array.
8689
Between calling these methods, you build each item of the array.
8790
See also [the notes about arrays and objects below](#notes-about-arrays-and-objects).
@@ -133,10 +136,6 @@ There is a similar problem with [parsing JSON](json-parsing.md#notes-about-numbe
133136

134137
## Notes about strings
135138

136-
It is currently not possible to add a string containing the zero byte `\0` to JSON.
137-
This would be easy to implement if needed, so
138-
please [create an issue on GitHub](https://github.com/Akuli/jou/issues/new) if you need this.
139-
140139
The string given to `JSONBuilder.string()` should be valid UTF-8.
141140
If it isn't, the resulting JSON will simply contain the given invalid UTF-8.
142141

stdlib/json.jou

Lines changed: 26 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import "stdlib/ascii.jou"
22
import "stdlib/assert.jou"
3+
import "stdlib/intnative.jou"
34
import "stdlib/list.jou"
45
import "stdlib/mem.jou"
56
import "stdlib/str.jou"
@@ -85,7 +86,7 @@ class JSONBuilder:
8586

8687
self.add_comma_if_needed()
8788
self.new_line()
88-
self.add_string_to_output(key)
89+
self.add_string_to_output(key, strlen(key))
8990
self.output.append(':')
9091
if self.pretty_print != 0:
9192
self.output.append(' ')
@@ -127,49 +128,55 @@ class JSONBuilder:
127128
self.output.extend_from_ptr("false", 5)
128129

129130
@public
130-
def string(self, s: byte*) -> None:
131+
def string_with_len(self, s: byte*, len: intnative) -> None:
131132
if s == NULL:
132133
self.null()
133134
else:
135+
assert len >= 0
134136
self.before_a_value()
135-
self.add_string_to_output(s)
137+
self.add_string_to_output(s, len)
138+
139+
@public
140+
def string(self, s: byte*) -> None:
141+
self.string_with_len(s, 0 if s == NULL else strlen(s))
136142

137143
# This method is used for .string() and .key() methods.
138-
def add_string_to_output(self, s: byte*) -> None:
144+
def add_string_to_output(self, s: byte*, len: intnative) -> None:
139145
self.output.append('"')
140-
while *s != '\0':
141-
match *s:
146+
i: intnative = 0
147+
while i < len:
148+
match s[i]:
142149
case '\n':
143150
self.output.extend_from_ptr("\\n", 2)
144-
s++
151+
i++
145152
case '\t':
146153
self.output.extend_from_ptr("\\t", 2)
147-
s++
154+
i++
148155
case '\r':
149156
self.output.extend_from_ptr("\\r", 2)
150-
s++
157+
i++
151158
case '"' | '\\':
152159
self.output.append('\\')
153-
self.output.append(*s++)
160+
self.output.append(s[i++])
154161
case _:
155-
if is_ascii_printable(*s):
156-
self.output.append(*s++)
157-
elif *s < 128:
162+
if is_ascii_printable(s[i]):
163+
self.output.append(s[i++])
164+
elif s[i] < 128:
158165
# unprintable ASCII character, do these carefully
159166
buf: byte[16]
160-
sprintf(buf, "\\u00%02x", *s++)
167+
sprintf(buf, "\\u00%02x", s[i++])
161168
self.output.extend_from_ptr(buf, strlen(buf))
162-
elif starts_with(s, "\xe2\x80\xa8"):
169+
elif len-i >= 3 and starts_with(&s[i], "\xe2\x80\xa8"):
163170
# Unicode character U+2028, placing it directly into JSON may cause problems
164171
self.output.extend_from_ptr("\\u2028", 6)
165-
s = &s[3]
166-
elif starts_with(s, "\xe2\x80\xa9"):
172+
i += 3
173+
elif len-i >= 3 and starts_with(&s[i], "\xe2\x80\xa9"):
167174
# Unicode character U+2029, placing it directly into JSON may cause problems
168175
self.output.extend_from_ptr("\\u2029", 6)
169-
s = &s[3]
176+
i += 3
170177
else:
171178
# part of a non-ASCII character in UTF-8, output it as is
172-
self.output.append(*s++)
179+
self.output.append(s[i++])
173180
self.output.append('"')
174181

175182
@public

tests/should_succeed/json_test_builder.jou

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,33 @@ def test_strings() -> None:
147147
printf("%d\n", strcmp(json, "\"non-ASCII like €£💸 is valid as is\"")) # Output: 0
148148
free(json)
149149

150+
# String that contains zero bytes
151+
hello0world = ['h', 'e', 'l', 'l', 'o', '\0', 'w', 'o', 'r', 'l', 'd']
152+
corner_case: byte[7] = ['w', 'a', 't', '\0', '\xe2', '\x80', '\xa9']
153+
jb = JSONBuilder{}
154+
jb.begin_array()
155+
jb.string_with_len(hello0world, 11)
156+
jb.string_with_len("test", 5)
157+
jb.string_with_len(corner_case, 7)
158+
jb.end_array()
159+
json = jb.finish()
160+
puts(json) # Output: ["hello\u0000world","test\u0000","wat\u0000\u2029"]
161+
free(json)
162+
163+
# Invalid UTF-8 is passed through as is. The check for U+2028/U+2029
164+
# shouldn't read beyond the end of the string even in this case.
165+
#
166+
# Using malloc() so any problems can hopefully be found with valgrind.
167+
s: byte* = malloc(2)
168+
s[0] = '\xe2'
169+
s[1] = '\x80'
170+
jb = JSONBuilder{}
171+
jb.string_with_len(s, 2)
172+
free(s)
173+
json = jb.finish()
174+
printf("%d\n", strcmp(json, "\"\xe2\x80\"")) # Output: 0
175+
free(json)
176+
150177

151178
def test_numbers() -> None:
152179
jb = JSONBuilder{pretty_print = 2}

0 commit comments

Comments
 (0)