Skip to content

Commit 8a835f9

Browse files
authored
Add json implementation for to_bytes function. (#8723)
The query `select to_bytes(to_json("[1]"))` produces `{b'[1]'}`.
1 parent 414e561 commit 8a835f9

File tree

4 files changed

+69
-1
lines changed

4 files changed

+69
-1
lines changed

docs/reference/stdlib/bytes.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,7 @@ Bytes
193193
.. TODO: Function signatures except the first need to be revealed only for v5+
194194
195195
.. eql:function:: std::to_bytes(s: str) -> bytes
196+
std::to_bytes(j: json) -> bytes
196197
std::to_bytes(val: int16, endian: Endian) -> bytes
197198
std::to_bytes(val: int32, endian: Endian) -> bytes
198199
std::to_bytes(val: int64, endian: Endian) -> bytes
@@ -211,6 +212,13 @@ Bytes
211212
db> select to_bytes('テキスト');
212213
{b'\xe3\x83\x86\xe3\x82\xad\xe3\x82\xb9\xe3\x83\x88'}
213214
215+
The json values get converted as strings using UTF-8 encoding:
216+
217+
.. code-block:: edgeql-repl
218+
219+
db> select to_bytes(to_json('{"a": 1}'));
220+
{b'{"a": 1}'}
221+
214222
The integer values can be encoded as big-endian (most significant bit
215223
comes first) byte strings:
216224

edb/buildmeta.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@
5757
# The merge conflict there is a nice reminder that you probably need
5858
# to write a patch in edb/pgsql/patches.py, and then you should preserve
5959
# the old value.
60-
EDGEDB_CATALOG_VERSION = 2025_05_12_00_00
60+
EDGEDB_CATALOG_VERSION = 2025_05_20_00_00
6161
EDGEDB_MAJOR_VERSION = 7
6262

6363

edb/lib/std/70-converters.edgeql

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,15 @@ std::to_bytes(s: std::str) -> std::bytes {
270270
};
271271

272272

273+
CREATE FUNCTION
274+
std::to_bytes(j: std::json) -> std::bytes {
275+
CREATE ANNOTATION std::description :=
276+
'Convert a json value to a binary UTF-8 string.';
277+
SET volatility := 'Immutable';
278+
USING (to_bytes(to_str(j)));
279+
};
280+
281+
273282
CREATE SCALAR TYPE
274283
std::Endian EXTENDING enum<Little, Big>;
275284

tests/test_edgeql_functions.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3631,6 +3631,57 @@ async def test_edgeql_functions_string_bytes_conversion_error(self):
36313631
''',
36323632
)
36333633

3634+
async def test_edgeql_functions_json_bytes_conversion(self):
3635+
cases = [
3636+
(
3637+
json.dumps({"a": [1, 2, 3], "b": "foo"}),
3638+
json.dumps({"a": [1, 2, 3], "b": "foo"}),
3639+
),
3640+
3641+
# Without ensure_ascii=False, json.dumps will escape unicode to
3642+
# ascii characters. For example, the character '數' (U+6578)
3643+
# is encoded as b'\\u6578' instead of b'\xe6\x95\xb8'.
3644+
# Test that both will be decoded to the correct json and then back
3645+
# to utf-8 encoded bytes.
3646+
(
3647+
json.dumps(
3648+
{"數": [1, 2, 3], "言": "你好世界!"},
3649+
ensure_ascii=True
3650+
),
3651+
json.dumps(
3652+
{"數": [1, 2, 3], "言": "你好世界!"},
3653+
ensure_ascii=False
3654+
),
3655+
),
3656+
(
3657+
json.dumps(
3658+
{"數": [1, 2, 3], "言": "你好世界!"},
3659+
ensure_ascii=False
3660+
),
3661+
json.dumps(
3662+
{"數": [1, 2, 3], "言": "你好世界!"},
3663+
ensure_ascii=False
3664+
),
3665+
),
3666+
]
3667+
3668+
for input, expected in cases:
3669+
await self.assert_query_result(
3670+
r'''
3671+
WITH
3672+
input := <bytes>$input,
3673+
as_json := to_json(to_str(input)),
3674+
as_bytes := to_bytes(as_json),
3675+
SELECT
3676+
as_bytes = <bytes>$expected;
3677+
''',
3678+
{True},
3679+
variables={
3680+
"input": input.encode("utf-8"),
3681+
"expected": expected.encode("utf-8"),
3682+
},
3683+
)
3684+
36343685
async def test_edgeql_functions_int_bytes_conversion_01(self):
36353686
# Make sure we can convert the bytes to ints and back
36363687
for num in range(256):

0 commit comments

Comments
 (0)