Skip to content

Commit 88d4511

Browse files
committed
Merge pull request atomvm#492 from fadushin/small_big_ext
Add support for SMALL_BIG_EXT term encoding This PR adds support for the SMALL_BIG_EXT term encoding, supporting encoding and decoding of all integer sizes currently supported by AtomVM. These changes are made under both the "Apache 2.0" and the "GNU Lesser General Public License 2.1 or later" license terms (dual license). SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
2 parents 5ad6e37 + b989e98 commit 88d4511

File tree

4 files changed

+204
-6
lines changed

4 files changed

+204
-6
lines changed

src/libAtomVM/externalterm.c

Lines changed: 86 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
#define STRING_EXT 107
4141
#define LIST_EXT 108
4242
#define BINARY_EXT 109
43+
#define SMALL_BIG_EXT 110
4344
#define EXPORT_EXT 113
4445
#define MAP_EXT 116
4546
#define SMALL_ATOM_UTF8_EXT 119
@@ -48,13 +49,17 @@
4849
#define NEW_FLOAT_EXT_SIZE 9
4950
#define SMALL_INTEGER_EXT_SIZE 2
5051
#define INTEGER_EXT_SIZE 5
52+
#define SMALL_BIG_EXT_BASE_SIZE 3
5153
#define ATOM_EXT_BASE_SIZE 3
5254
#define STRING_EXT_BASE_SIZE 3
5355
#define LIST_EXT_BASE_SIZE 5
5456
#define BINARY_EXT_BASE_SIZE 5
5557
#define MAP_EXT_BASE_SIZE 5
5658
#define SMALL_ATOM_EXT_BASE_SIZE 2
5759

60+
// Assuming two's-complement implementation of signed integers
61+
#define SIGNED_INT_TO_UNSIGNED(val, unsigned_type) ((val) < 0 ? ~((unsigned_type) (val)) + 1 : (val))
62+
5863
// MAINTENANCE NOTE. Range checking on the external term buffer is only performed in
5964
// the calculate_heap_usage function, which will fail with an invalid term if there is
6065
// insufficient space in the external term buffer (preventing reading off the end of the
@@ -202,6 +207,27 @@ static size_t compute_external_size(Context *ctx, term t)
202207
return serialize_term(ctx, NULL, t);
203208
}
204209

210+
static uint8_t get_num_bytes(avm_uint64_t val)
211+
{
212+
uint8_t num_bytes = 0;
213+
while (val != 0) {
214+
val = val >> 8;
215+
++num_bytes;
216+
}
217+
return num_bytes;
218+
}
219+
220+
static void write_bytes(uint8_t *buf, avm_uint64_t val)
221+
{
222+
uint8_t i = 0;
223+
while (val != 0) {
224+
uint8_t byte = val & 0xFF;
225+
buf[i] = byte;
226+
val = val >> 8;
227+
++i;
228+
}
229+
}
230+
205231
static int serialize_term(Context *ctx, uint8_t *buf, term t)
206232
{
207233
if (term_is_uint8(t)) {
@@ -211,13 +237,26 @@ static int serialize_term(Context *ctx, uint8_t *buf, term t)
211237
}
212238
return 2;
213239

214-
} else if (term_is_integer(t)) {
215-
if (!IS_NULL_PTR(buf)) {
216-
int32_t val = term_to_int32(t);
217-
buf[0] = INTEGER_EXT;
218-
WRITE_32_UNALIGNED(buf + 1, val);
240+
} else if (term_is_any_integer(t)) {
241+
242+
avm_int64_t val = term_maybe_unbox_int64(t);
243+
if (val >= INT32_MIN && val <= INT32_MAX) {
244+
if (buf != NULL) {
245+
buf[0] = INTEGER_EXT;
246+
WRITE_32_UNALIGNED(buf + 1, (int32_t) val);
247+
}
248+
return INTEGER_EXT_SIZE;
249+
} else {
250+
avm_uint64_t unsigned_val = SIGNED_INT_TO_UNSIGNED(val, avm_uint64_t);
251+
uint8_t num_bytes = get_num_bytes(unsigned_val);
252+
if (buf != NULL) {
253+
buf[0] = SMALL_BIG_EXT;
254+
buf[1] = num_bytes;
255+
buf[2] = val < 0 ? 0x01 : 0x00;
256+
write_bytes(buf + 3, unsigned_val);
257+
}
258+
return SMALL_BIG_EXT_BASE_SIZE + num_bytes;
219259
}
220-
return 5;
221260

222261
} else if (term_is_float(t)) {
223262
if (!IS_NULL_PTR(buf)) {
@@ -341,6 +380,15 @@ static int serialize_term(Context *ctx, uint8_t *buf, term t)
341380
}
342381
}
343382

383+
static avm_uint64_t read_bytes(const uint8_t *buf, uint8_t num_bytes)
384+
{
385+
avm_uint64_t value = 0;
386+
for (uint8_t i = 0; i < num_bytes; ++i) {
387+
value |= (((avm_uint64_t) buf[i]) << (i * 8));
388+
}
389+
return value;
390+
}
391+
344392
static term parse_external_terms(const uint8_t *external_term_buf, int *eterm_size, Context *ctx, bool copy)
345393
{
346394
switch (external_term_buf[0]) {
@@ -368,6 +416,23 @@ static term parse_external_terms(const uint8_t *external_term_buf, int *eterm_si
368416
return term_make_maybe_boxed_int64(ctx, value);
369417
}
370418

419+
case SMALL_BIG_EXT: {
420+
uint8_t num_bytes = external_term_buf[1];
421+
uint8_t sign = external_term_buf[2];
422+
avm_uint64_t unsigned_value = read_bytes(external_term_buf + 3, num_bytes);
423+
// NB due to call to calculate_heap_usage, there is no loss of precision:
424+
// 1. 0 <= unsigned_value <= INT64_MAX if sign is 0
425+
// 2. 0 <= unsigned_value <= INT64_MAX + 1 if sign is not 0
426+
avm_int64_t value = 0;
427+
if (sign != 0x00) {
428+
value = -((avm_int64_t) unsigned_value);
429+
} else {
430+
value = (avm_int64_t) unsigned_value;
431+
}
432+
*eterm_size = SMALL_BIG_EXT_BASE_SIZE + num_bytes;
433+
return term_make_maybe_boxed_int64(ctx, value);
434+
}
435+
371436
case ATOM_EXT: {
372437
uint16_t atom_len = READ_16_UNALIGNED(external_term_buf + 1);
373438

@@ -549,6 +614,21 @@ static int calculate_heap_usage(const uint8_t *external_term_buf, size_t remaini
549614
return term_boxed_integer_size(value);
550615
}
551616

617+
case SMALL_BIG_EXT: {
618+
uint8_t num_bytes = external_term_buf[1];
619+
if (UNLIKELY(num_bytes > 8 || remaining < (SMALL_BIG_EXT_BASE_SIZE + num_bytes))) {
620+
return INVALID_TERM_SIZE;
621+
}
622+
uint8_t sign = external_term_buf[2];
623+
*eterm_size = SMALL_BIG_EXT_BASE_SIZE + num_bytes;
624+
avm_uint64_t value = read_bytes(external_term_buf + 3, num_bytes);
625+
// NB. We currently support max 64-bit signed integers (assuming two's complement signed values in 63 bits)
626+
if (UNLIKELY((sign == 0 && value > INT64_MAX) || (sign != 0 && value > (((avm_uint64_t) INT64_MAX) + 1)))) {
627+
return INVALID_TERM_SIZE;
628+
}
629+
return term_boxed_integer_size(value);
630+
}
631+
552632
case ATOM_EXT: {
553633
if (UNLIKELY(remaining < ATOM_EXT_BASE_SIZE)) {
554634
return INVALID_TERM_SIZE;

tests/erlang_tests/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -439,6 +439,7 @@ compile_erlang(unlink_error)
439439
compile_erlang(trap_exit_flag)
440440

441441
compile_erlang(test_stacktrace)
442+
compile_erlang(small_big_ext)
442443

443444
add_custom_target(erlang_test_modules DEPENDS
444445
add.beam
@@ -853,4 +854,5 @@ add_custom_target(erlang_test_modules DEPENDS
853854
trap_exit_flag.beam
854855

855856
test_stacktrace.beam
857+
small_big_ext.beam
856858
)
Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
%
2+
% This file is part of AtomVM.
3+
%
4+
% Copyright 2023 Fred Dushin <[email protected]>
5+
%
6+
% Licensed under the Apache License, Version 2.0 (the "License");
7+
% you may not use this file except in compliance with the License.
8+
% You may obtain a copy of the License at
9+
%
10+
% http://www.apache.org/licenses/LICENSE-2.0
11+
%
12+
% Unless required by applicable law or agreed to in writing, software
13+
% distributed under the License is distributed on an "AS IS" BASIS,
14+
% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
% See the License for the specific language governing permissions and
16+
% limitations under the License.
17+
%
18+
% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
19+
%
20+
21+
-module(small_big_ext).
22+
23+
-export([start/0]).
24+
25+
-define(INT64_MAX, 9223372036854775807).
26+
-define(INT64_MIN, -9223372036854775808).
27+
28+
start() ->
29+
true = test_reverse(pow(32), <<131, 110, 5, 0, 0, 0, 0, 0, 1>>),
30+
true = test_reverse(pow(32) + 1, <<131, 110, 5, 0, 1, 0, 0, 0, 1>>),
31+
true = test_reverse(pow(60) + 1, <<131, 110, 8, 0, 1, 0, 0, 0, 0, 0, 0, 16>>),
32+
true = test_reverse(-1 * (pow(60) + 1), <<131, 110, 8, 1, 1, 0, 0, 0, 0, 0, 0, 16>>),
33+
true = test_reverse(?INT64_MAX, <<131, 110, 8, 0, 255, 255, 255, 255, 255, 255, 255, 127>>),
34+
true = test_reverse(?INT64_MIN, <<131, 110, 8, 1, 0, 0, 0, 0, 0, 0, 0, 128>>),
35+
36+
%% we can still decode really small encodings
37+
1 = erlang:binary_to_term(<<131, 110, 1, 0, 1>>),
38+
-1 = erlang:binary_to_term(<<131, 110, 1, 1, 1>>),
39+
-1 = erlang:binary_to_term(<<131, 110, 1, 3, 1>>),
40+
-1 = erlang:binary_to_term(<<131, 110, 1, -1, 1>>),
41+
42+
%% 0-length encodings are legal
43+
0 = erlang:binary_to_term(<<131, 110, 0, 0>>),
44+
0 = erlang:binary_to_term(<<131, 110, 0, 1>>),
45+
46+
%% edge cases around INT32 min/max boundaries
47+
true = test_reverse(pow(31) - 1, <<131, 98, 127, 255, 255, 255>>),
48+
true = test_reverse(pow(31), <<131, 110, 4, 0, 0, 0, 0, 128>>),
49+
true = test_reverse(-pow(31), <<131, 98, 128, 0, 0, 0>>),
50+
true = test_reverse(-pow(31) - 1, <<131, 110, 4, 1, 1, 0, 0, 128>>),
51+
52+
%% missing sign
53+
ok = assert_badarg(
54+
fun() ->
55+
erlang:binary_to_term(<<131, 110, 0>>)
56+
end
57+
),
58+
59+
%% we currently only support up to 64 bit (signed) integers
60+
case erlang:system_info(machine) of
61+
"BEAM" ->
62+
test_reverse(
63+
pow(63) + 1, <<131, 110, 8, 0, 1, 0, 0, 0, 0, 0, 0, 128>>
64+
),
65+
test_reverse(
66+
-(pow(63) + 2), <<131, 110, 8, 1, 2, 0, 0, 0, 0, 0, 0, 128>>
67+
),
68+
test_reverse(
69+
pow(128), <<131, 110, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1>>
70+
);
71+
_ ->
72+
ok = assert_badarg(
73+
fun() ->
74+
erlang:binary_to_term(
75+
<<131, 110, 8, 0, 1, 0, 0, 0, 0, 0, 0, 128>>
76+
)
77+
end
78+
),
79+
ok = assert_badarg(
80+
fun() ->
81+
erlang:binary_to_term(
82+
<<131, 110, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1>>
83+
)
84+
end
85+
),
86+
ok = assert_badarg(
87+
fun() ->
88+
erlang:binary_to_term(
89+
<<131, 110, 8, 1, 2, 0, 0, 0, 0, 0, 0, 128>>
90+
)
91+
end
92+
)
93+
end,
94+
0.
95+
96+
test_reverse(T, Interop) ->
97+
Bin = erlang:term_to_binary(T),
98+
Bin = Interop,
99+
{X, Used} = erlang:binary_to_term(Bin, [used]),
100+
Used = erlang:byte_size(Bin),
101+
X =:= T.
102+
103+
assert_badarg(F) ->
104+
try
105+
R = F(),
106+
{fail_no_ex, R}
107+
catch
108+
error:badarg -> ok
109+
end.
110+
111+
pow(0) ->
112+
1;
113+
pow(X) ->
114+
Y = pow(X - 1),
115+
Y bsl 1.

tests/test.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -475,6 +475,7 @@ struct Test tests[] = {
475475
TEST_CASE_EXPECTED(unlink_error, 1),
476476
TEST_CASE_EXPECTED(trap_exit_flag, 1),
477477
TEST_CASE_COND(test_stacktrace, 0, SKIP_STACKTRACES),
478+
TEST_CASE(small_big_ext),
478479

479480
// TEST CRASHES HERE: TEST_CASE(memlimit),
480481

0 commit comments

Comments
 (0)